mirror of
https://github.com/GOSTSec/sgminer
synced 2025-01-10 23:08:07 +00:00
Use atomic ops to never miss a nonce on opencl kernels, including nonce==0, also allowing us to make the output buffer smaller.
This commit is contained in:
parent
61df3013a8
commit
86d5377e72
@ -62,7 +62,7 @@ void search(
|
|||||||
const uint c1_plus_k5, const uint b1_plus_k6,
|
const uint c1_plus_k5, const uint b1_plus_k6,
|
||||||
const uint state0, const uint state1, const uint state2, const uint state3,
|
const uint state0, const uint state1, const uint state2, const uint state3,
|
||||||
const uint state4, const uint state5, const uint state6, const uint state7,
|
const uint state4, const uint state5, const uint state6, const uint state7,
|
||||||
__global uint * output)
|
volatile __global uint * output)
|
||||||
{
|
{
|
||||||
|
|
||||||
z ZA[930];
|
z ZA[930];
|
||||||
@ -1242,33 +1242,50 @@ void search(
|
|||||||
|
|
||||||
ZA[924] = (ZCh(ZA[922], ZA[920], ZA[918]) + ZA[923]) + ZR26(ZA[922]);
|
ZA[924] = (ZCh(ZA[922], ZA[920], ZA[918]) + ZA[923]) + ZR26(ZA[922]);
|
||||||
|
|
||||||
#define FOUND (0x800)
|
#define FOUND (0x0F)
|
||||||
#define NFLAG (0x7FF)
|
|
||||||
|
|
||||||
#if defined(VECTORS4)
|
#if defined(VECTORS4)
|
||||||
bool result = any(ZA[924] == 0x136032EDU);
|
bool result = any(ZA[924] == 0x136032EDU);
|
||||||
|
|
||||||
if (result) {
|
if (result) {
|
||||||
if (ZA[924].x == 0x136032EDU)
|
uint found;
|
||||||
output[FOUND] = output[NFLAG & Znonce.x] = Znonce.x;
|
|
||||||
if (ZA[924].y == 0x136032EDU)
|
if (ZA[924].x == 0x136032EDU) {
|
||||||
output[FOUND] = output[NFLAG & Znonce.y] = Znonce.y;
|
found = atomic_add(&output[FOUND], 1);
|
||||||
if (ZA[924].z == 0x136032EDU)
|
output[found] = Znonce.x;
|
||||||
output[FOUND] = output[NFLAG & Znonce.z] = Znonce.z;
|
}
|
||||||
if (ZA[924].w == 0x136032EDU)
|
if (ZA[924].y == 0x136032EDU) {
|
||||||
output[FOUND] = output[NFLAG & Znonce.w] = Znonce.w;
|
found = atomic_add(&output[FOUND], 1);
|
||||||
|
output[found] = Znonce.y;
|
||||||
|
}
|
||||||
|
if (ZA[924].z == 0x136032EDU) {
|
||||||
|
found = atomic_add(&output[FOUND], 1);
|
||||||
|
output[found] = Znonce.z;
|
||||||
|
}
|
||||||
|
if (ZA[924].w == 0x136032EDU) {
|
||||||
|
found = atomic_add(&output[FOUND], 1);
|
||||||
|
output[found] = Znonce.w;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#elif defined(VECTORS2)
|
#elif defined(VECTORS2)
|
||||||
bool result = any(ZA[924] == 0x136032EDU);
|
bool result = any(ZA[924] == 0x136032EDU);
|
||||||
|
|
||||||
if (result) {
|
if (result) {
|
||||||
if (ZA[924].x == 0x136032EDU)
|
uint found;
|
||||||
output[FOUND] = output[NFLAG & Znonce.x] = Znonce.x;
|
|
||||||
if (ZA[924].y == 0x136032EDU)
|
if (ZA[924].x == 0x136032EDU) {
|
||||||
output[FOUND] = output[NFLAG & Znonce.y] = Znonce.y;
|
found = atomic_add(&output[FOUND], 1);
|
||||||
|
output[found] = Znonce.x;
|
||||||
|
}
|
||||||
|
if (ZA[924].y == 0x136032EDU) {
|
||||||
|
found = atomic_add(&output[FOUND], 1);
|
||||||
|
output[found] = Znonce.y;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
if (ZA[924] == 0x136032EDU)
|
if (ZA[924] == 0x136032EDU) {
|
||||||
output[FOUND] = output[NFLAG & Znonce] = Znonce;
|
uint found = atomic_add(&output[FOUND], 1);
|
||||||
|
output[found] = Znonce;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -48,7 +48,7 @@ __kernel
|
|||||||
const uint state0A, const uint state0B,
|
const uint state0A, const uint state0B,
|
||||||
const uint state1A, const uint state2A, const uint state3A, const uint state4A,
|
const uint state1A, const uint state2A, const uint state3A, const uint state4A,
|
||||||
const uint state5A, const uint state6A, const uint state7A,
|
const uint state5A, const uint state6A, const uint state7A,
|
||||||
__global uint * output)
|
volatile __global uint * output)
|
||||||
{
|
{
|
||||||
u V[8];
|
u V[8];
|
||||||
u W[16];
|
u W[16];
|
||||||
@ -571,17 +571,46 @@ __kernel
|
|||||||
|
|
||||||
V[7] += V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
|
V[7] += V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]);
|
||||||
|
|
||||||
#define FOUND (0x800)
|
#define FOUND (0x0F)
|
||||||
#define NFLAG (0x7FF)
|
|
||||||
|
|
||||||
#ifdef VECTORS4
|
#ifdef VECTORS4
|
||||||
if ((V[7].x == 0x136032edU) ^ (V[7].y == 0x136032edU) ^ (V[7].z == 0x136032edU) ^ (V[7].w == 0x136032edU))
|
if ((V[7].x == 0x136032edU) ^ (V[7].y == 0x136032edU) ^ (V[7].z == 0x136032edU) ^ (V[7].w == 0x136032edU)) {
|
||||||
output[FOUND] = output[NFLAG & nonce.x] = (V[7].x == 0x136032edU) ? nonce.x : ((V[7].y == 0x136032edU) ? nonce.y : ((V[7].z == 0x136032edU) ? nonce.z : nonce.w));
|
uint found;
|
||||||
|
|
||||||
|
if (V[7].x == 0x136032edU) {
|
||||||
|
found = atomic_add(&output[FOUND], 1);
|
||||||
|
output[found] = nonce.x;
|
||||||
|
}
|
||||||
|
if (V[7].y == 0x136032edU) {
|
||||||
|
found = atomic_add(&output[FOUND], 1);
|
||||||
|
output[found] = nonce.y;
|
||||||
|
}
|
||||||
|
if (V[7].z == 0x136032edU) {
|
||||||
|
found = atomic_add(&output[FOUND], 1);
|
||||||
|
output[found] = nonce.z;
|
||||||
|
}
|
||||||
|
if (V[7].w == 0x136032edU) {
|
||||||
|
found = atomic_add(&output[FOUND], 1);
|
||||||
|
output[found] = nonce.w;
|
||||||
|
}
|
||||||
|
}
|
||||||
#elif defined VECTORS2
|
#elif defined VECTORS2
|
||||||
if ((V[7].x == 0x136032edU) + (V[7].y == 0x136032edU))
|
if ((V[7].x == 0x136032edU) + (V[7].y == 0x136032edU)) {
|
||||||
output[FOUND] = output[NFLAG & nonce.x] = (V[7].x == 0x136032edU) ? nonce.x : nonce.y;
|
uint found;
|
||||||
|
|
||||||
|
if (V[7].x == 0x136032edU) {
|
||||||
|
found = atomic_add(&output[FOUND], 1);
|
||||||
|
output[found] = nonce.x;
|
||||||
|
}
|
||||||
|
if (V[7].y == 0x136032edU) {
|
||||||
|
found = atomic_add(&output[FOUND], 1);
|
||||||
|
output[found] = nonce.y;
|
||||||
|
}
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
if (V[7] == 0x136032edU)
|
if (V[7] == 0x136032edU) {
|
||||||
output[FOUND] = output[NFLAG & nonce] = nonce;
|
uint found = atomic_add(&output[FOUND], 1);
|
||||||
|
output[found] = nonce;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -1511,7 +1511,7 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work,
|
|||||||
if (hashes > gpu->max_hashes)
|
if (hashes > gpu->max_hashes)
|
||||||
gpu->max_hashes = hashes;
|
gpu->max_hashes = hashes;
|
||||||
|
|
||||||
/* MAXBUFFERS entry is used as a flag to say nonces exist */
|
/* FOUND entry is used as a counter to say how many nonces exist */
|
||||||
if (thrdata->res[FOUND]) {
|
if (thrdata->res[FOUND]) {
|
||||||
/* Clear the buffer again */
|
/* Clear the buffer again */
|
||||||
status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
|
status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0,
|
||||||
|
17
findnonce.c
17
findnonce.c
@ -172,6 +172,7 @@ struct pc_data {
|
|||||||
struct work *work;
|
struct work *work;
|
||||||
uint32_t res[MAXBUFFERS];
|
uint32_t res[MAXBUFFERS];
|
||||||
pthread_t pth;
|
pthread_t pth;
|
||||||
|
int found;
|
||||||
};
|
};
|
||||||
|
|
||||||
static void send_sha_nonce(struct pc_data *pcd, cl_uint nonce)
|
static void send_sha_nonce(struct pc_data *pcd, cl_uint nonce)
|
||||||
@ -237,32 +238,22 @@ static void send_scrypt_nonce(struct pc_data *pcd, uint32_t nonce)
|
|||||||
static void *postcalc_hash(void *userdata)
|
static void *postcalc_hash(void *userdata)
|
||||||
{
|
{
|
||||||
struct pc_data *pcd = (struct pc_data *)userdata;
|
struct pc_data *pcd = (struct pc_data *)userdata;
|
||||||
struct thr_info *thr = pcd->thr;
|
unsigned int entry = 0;
|
||||||
int entry = 0, nonces = 0;
|
|
||||||
|
|
||||||
pthread_detach(pthread_self());
|
pthread_detach(pthread_self());
|
||||||
|
|
||||||
for (entry = 0; entry < FOUND; entry++) {
|
for (entry = 0; entry < pcd->res[FOUND]; entry++) {
|
||||||
uint32_t nonce = pcd->res[entry];
|
uint32_t nonce = pcd->res[entry];
|
||||||
|
|
||||||
if (nonce) {
|
applog(LOG_DEBUG, "OCL NONCE %u found in slot %d", nonce, entry);
|
||||||
applog(LOG_DEBUG, "OCL NONCE %u", nonce);
|
|
||||||
if (opt_scrypt)
|
if (opt_scrypt)
|
||||||
send_scrypt_nonce(pcd, nonce);
|
send_scrypt_nonce(pcd, nonce);
|
||||||
else
|
else
|
||||||
send_sha_nonce(pcd, nonce);
|
send_sha_nonce(pcd, nonce);
|
||||||
nonces++;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
free(pcd);
|
free(pcd);
|
||||||
|
|
||||||
if (unlikely(!nonces)) {
|
|
||||||
applog(LOG_DEBUG, "No nonces found! Error in OpenCL code?");
|
|
||||||
hw_errors++;
|
|
||||||
thr->cgpu->hw_errors++;
|
|
||||||
}
|
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4,10 +4,9 @@
|
|||||||
#include "config.h"
|
#include "config.h"
|
||||||
|
|
||||||
#define MAXTHREADS (0xFFFFFFFEULL)
|
#define MAXTHREADS (0xFFFFFFFEULL)
|
||||||
#define MAXBUFFERS (0xFFF)
|
#define MAXBUFFERS (0x10)
|
||||||
#define BUFFERSIZE (sizeof(uint32_t) * MAXBUFFERS)
|
#define BUFFERSIZE (sizeof(uint32_t) * MAXBUFFERS)
|
||||||
#define FOUND (0x800)
|
#define FOUND (0x0F)
|
||||||
/* #define NFLAG (0x7FF) Just for reference */
|
|
||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data);
|
extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data);
|
||||||
|
162
mkinstalldirs
162
mkinstalldirs
@ -1,162 +0,0 @@
|
|||||||
#! /bin/sh
|
|
||||||
# mkinstalldirs --- make directory hierarchy
|
|
||||||
|
|
||||||
scriptversion=2009-04-28.21; # UTC
|
|
||||||
|
|
||||||
# Original author: Noah Friedman <friedman@prep.ai.mit.edu>
|
|
||||||
# Created: 1993-05-16
|
|
||||||
# Public domain.
|
|
||||||
#
|
|
||||||
# This file is maintained in Automake, please report
|
|
||||||
# bugs to <bug-automake@gnu.org> or send patches to
|
|
||||||
# <automake-patches@gnu.org>.
|
|
||||||
|
|
||||||
nl='
|
|
||||||
'
|
|
||||||
IFS=" "" $nl"
|
|
||||||
errstatus=0
|
|
||||||
dirmode=
|
|
||||||
|
|
||||||
usage="\
|
|
||||||
Usage: mkinstalldirs [-h] [--help] [--version] [-m MODE] DIR ...
|
|
||||||
|
|
||||||
Create each directory DIR (with mode MODE, if specified), including all
|
|
||||||
leading file name components.
|
|
||||||
|
|
||||||
Report bugs to <bug-automake@gnu.org>."
|
|
||||||
|
|
||||||
# process command line arguments
|
|
||||||
while test $# -gt 0 ; do
|
|
||||||
case $1 in
|
|
||||||
-h | --help | --h*) # -h for help
|
|
||||||
echo "$usage"
|
|
||||||
exit $?
|
|
||||||
;;
|
|
||||||
-m) # -m PERM arg
|
|
||||||
shift
|
|
||||||
test $# -eq 0 && { echo "$usage" 1>&2; exit 1; }
|
|
||||||
dirmode=$1
|
|
||||||
shift
|
|
||||||
;;
|
|
||||||
--version)
|
|
||||||
echo "$0 $scriptversion"
|
|
||||||
exit $?
|
|
||||||
;;
|
|
||||||
--) # stop option processing
|
|
||||||
shift
|
|
||||||
break
|
|
||||||
;;
|
|
||||||
-*) # unknown option
|
|
||||||
echo "$usage" 1>&2
|
|
||||||
exit 1
|
|
||||||
;;
|
|
||||||
*) # first non-opt arg
|
|
||||||
break
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
done
|
|
||||||
|
|
||||||
for file
|
|
||||||
do
|
|
||||||
if test -d "$file"; then
|
|
||||||
shift
|
|
||||||
else
|
|
||||||
break
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
case $# in
|
|
||||||
0) exit 0 ;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
# Solaris 8's mkdir -p isn't thread-safe. If you mkdir -p a/b and
|
|
||||||
# mkdir -p a/c at the same time, both will detect that a is missing,
|
|
||||||
# one will create a, then the other will try to create a and die with
|
|
||||||
# a "File exists" error. This is a problem when calling mkinstalldirs
|
|
||||||
# from a parallel make. We use --version in the probe to restrict
|
|
||||||
# ourselves to GNU mkdir, which is thread-safe.
|
|
||||||
case $dirmode in
|
|
||||||
'')
|
|
||||||
if mkdir -p --version . >/dev/null 2>&1 && test ! -d ./--version; then
|
|
||||||
echo "mkdir -p -- $*"
|
|
||||||
exec mkdir -p -- "$@"
|
|
||||||
else
|
|
||||||
# On NextStep and OpenStep, the 'mkdir' command does not
|
|
||||||
# recognize any option. It will interpret all options as
|
|
||||||
# directories to create, and then abort because '.' already
|
|
||||||
# exists.
|
|
||||||
test -d ./-p && rmdir ./-p
|
|
||||||
test -d ./--version && rmdir ./--version
|
|
||||||
fi
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
if mkdir -m "$dirmode" -p --version . >/dev/null 2>&1 &&
|
|
||||||
test ! -d ./--version; then
|
|
||||||
echo "mkdir -m $dirmode -p -- $*"
|
|
||||||
exec mkdir -m "$dirmode" -p -- "$@"
|
|
||||||
else
|
|
||||||
# Clean up after NextStep and OpenStep mkdir.
|
|
||||||
for d in ./-m ./-p ./--version "./$dirmode";
|
|
||||||
do
|
|
||||||
test -d $d && rmdir $d
|
|
||||||
done
|
|
||||||
fi
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
for file
|
|
||||||
do
|
|
||||||
case $file in
|
|
||||||
/*) pathcomp=/ ;;
|
|
||||||
*) pathcomp= ;;
|
|
||||||
esac
|
|
||||||
oIFS=$IFS
|
|
||||||
IFS=/
|
|
||||||
set fnord $file
|
|
||||||
shift
|
|
||||||
IFS=$oIFS
|
|
||||||
|
|
||||||
for d
|
|
||||||
do
|
|
||||||
test "x$d" = x && continue
|
|
||||||
|
|
||||||
pathcomp=$pathcomp$d
|
|
||||||
case $pathcomp in
|
|
||||||
-*) pathcomp=./$pathcomp ;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
if test ! -d "$pathcomp"; then
|
|
||||||
echo "mkdir $pathcomp"
|
|
||||||
|
|
||||||
mkdir "$pathcomp" || lasterr=$?
|
|
||||||
|
|
||||||
if test ! -d "$pathcomp"; then
|
|
||||||
errstatus=$lasterr
|
|
||||||
else
|
|
||||||
if test ! -z "$dirmode"; then
|
|
||||||
echo "chmod $dirmode $pathcomp"
|
|
||||||
lasterr=
|
|
||||||
chmod "$dirmode" "$pathcomp" || lasterr=$?
|
|
||||||
|
|
||||||
if test ! -z "$lasterr"; then
|
|
||||||
errstatus=$lasterr
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
||||||
pathcomp=$pathcomp/
|
|
||||||
done
|
|
||||||
done
|
|
||||||
|
|
||||||
exit $errstatus
|
|
||||||
|
|
||||||
# Local Variables:
|
|
||||||
# mode: shell-script
|
|
||||||
# sh-indentation: 2
|
|
||||||
# eval: (add-hook 'write-file-hooks 'time-stamp)
|
|
||||||
# time-stamp-start: "scriptversion="
|
|
||||||
# time-stamp-format: "%:y-%02m-%02d.%02H"
|
|
||||||
# time-stamp-time-zone: "UTC"
|
|
||||||
# time-stamp-end: "; # UTC"
|
|
||||||
# End:
|
|
@ -164,7 +164,7 @@ void search( const uint state0, const uint state1, const uint state2, const uint
|
|||||||
const uint PreW18, const uint PreW19,
|
const uint PreW18, const uint PreW19,
|
||||||
const uint PreW31, const uint PreW32,
|
const uint PreW31, const uint PreW32,
|
||||||
|
|
||||||
__global uint * output)
|
volatile __global uint * output)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
|
||||||
@ -387,31 +387,48 @@ void search( const uint state0, const uint state1, const uint state2, const uint
|
|||||||
W[117] += W[108] + Vals[3] + Vals[7] + P2(124) + P1(124) + Ch((Vals[0] + Vals[4]) + (K[59] + W(59+64)) + s1(64+59)+ ch(59+64),Vals[1],Vals[2]) -
|
W[117] += W[108] + Vals[3] + Vals[7] + P2(124) + P1(124) + Ch((Vals[0] + Vals[4]) + (K[59] + W(59+64)) + s1(64+59)+ ch(59+64),Vals[1],Vals[2]) -
|
||||||
(-(K[60] + H[7]) - S1((Vals[0] + Vals[4]) + (K[59] + W(59+64)) + s1(64+59)+ ch(59+64)));
|
(-(K[60] + H[7]) - S1((Vals[0] + Vals[4]) + (K[59] + W(59+64)) + s1(64+59)+ ch(59+64)));
|
||||||
|
|
||||||
#define FOUND (0x800)
|
#define FOUND (0x0F)
|
||||||
#define NFLAG (0x7FF)
|
|
||||||
|
|
||||||
#ifdef VECTORS4
|
#ifdef VECTORS4
|
||||||
bool result = W[117].x & W[117].y & W[117].z & W[117].w;
|
bool result = W[117].x & W[117].y & W[117].z & W[117].w;
|
||||||
if (!result) {
|
if (!result) {
|
||||||
if (!W[117].x)
|
uint found;
|
||||||
output[FOUND] = output[NFLAG & W[3].x] = W[3].x;
|
|
||||||
if (!W[117].y)
|
if (!W[117].x) {
|
||||||
output[FOUND] = output[NFLAG & W[3].y] = W[3].y;
|
found = atomic_add(&output[FOUND], 1);
|
||||||
if (!W[117].z)
|
output[found] = W[3].x;
|
||||||
output[FOUND] = output[NFLAG & W[3].z] = W[3].z;
|
}
|
||||||
if (!W[117].w)
|
if (!W[117].y) {
|
||||||
output[FOUND] = output[NFLAG & W[3].w] = W[3].w;
|
found = atomic_add(&output[FOUND], 1);
|
||||||
|
output[found] = W[3].y;
|
||||||
|
}
|
||||||
|
if (!W[117].z) {
|
||||||
|
found = atomic_add(&output[FOUND], 1);
|
||||||
|
output[found] = W[3].z;
|
||||||
|
}
|
||||||
|
if (!W[117].w) {
|
||||||
|
found = atomic_add(&output[FOUND], 1);
|
||||||
|
output[found] = W[3].w;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#elif defined VECTORS2
|
#elif defined VECTORS2
|
||||||
bool result = W[117].x & W[117].y;
|
bool result = W[117].x & W[117].y;
|
||||||
if (!result) {
|
if (!result) {
|
||||||
if (!W[117].x)
|
uint found;
|
||||||
output[FOUND] = output[NFLAG & W[3].x] = W[3].x;
|
|
||||||
if (!W[117].y)
|
if (!W[117].x) {
|
||||||
output[FOUND] = output[NFLAG & W[3].y] = W[3].y;
|
found = atomic_add(&output[FOUND], 1);
|
||||||
|
output[found] = W[3].x;
|
||||||
|
}
|
||||||
|
if (!W[117].y) {
|
||||||
|
found = atomic_add(&output[FOUND], 1);
|
||||||
|
output[found] = W[3].y;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
if (!W[117])
|
if (!W[117]) {
|
||||||
output[FOUND] = output[NFLAG & W[3]] = W[3];
|
uint found = atomic_add(&output[FOUND], 1);
|
||||||
|
output[found] = W[3];
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -80,7 +80,7 @@ void search(const uint state0, const uint state1, const uint state2, const uint
|
|||||||
const uint D1A, const uint C1addK5, const uint B1addK6,
|
const uint D1A, const uint C1addK5, const uint B1addK6,
|
||||||
const uint W16addK16, const uint W17addK17,
|
const uint W16addK16, const uint W17addK17,
|
||||||
const uint PreVal4addT1, const uint Preval0,
|
const uint PreVal4addT1, const uint Preval0,
|
||||||
__global uint * output)
|
volatile __global uint * output)
|
||||||
{
|
{
|
||||||
u Vals[24];
|
u Vals[24];
|
||||||
u *W = &Vals[8];
|
u *W = &Vals[8];
|
||||||
@ -1311,43 +1311,46 @@ Vals[1]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U));
|
|||||||
Vals[1]+=K[59];
|
Vals[1]+=K[59];
|
||||||
Vals[1]+=Vals[5];
|
Vals[1]+=Vals[5];
|
||||||
|
|
||||||
#define FOUND (0x800)
|
Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
|
||||||
#define NFLAG (0x7FF)
|
Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
|
||||||
|
Vals[2]+=W[12];
|
||||||
|
Vals[2]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U));
|
||||||
|
Vals[2]+=W[5];
|
||||||
|
Vals[2]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
|
||||||
|
Vals[2]+=Vals[0];
|
||||||
|
Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
|
||||||
|
Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
|
||||||
|
|
||||||
|
#define FOUND (0x0F)
|
||||||
|
|
||||||
#if defined(VECTORS2) || defined(VECTORS4)
|
#if defined(VECTORS2) || defined(VECTORS4)
|
||||||
Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]);
|
|
||||||
Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22));
|
|
||||||
Vals[2]+=W[12];
|
|
||||||
Vals[2]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U));
|
|
||||||
Vals[2]+=W[5];
|
|
||||||
Vals[2]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U));
|
|
||||||
Vals[2]+=Vals[0];
|
|
||||||
Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25));
|
|
||||||
Vals[2]+=ch(Vals[1],Vals[4],Vals[3]);
|
|
||||||
|
|
||||||
if (any(Vals[2] == 0x136032edU)) {
|
if (any(Vals[2] == 0x136032edU)) {
|
||||||
if (Vals[2].x == 0x136032edU)
|
uint found;
|
||||||
output[FOUND] = output[NFLAG & nonce.x] = nonce.x;
|
|
||||||
if (Vals[2].y == 0x136032edU)
|
if (Vals[2].x == 0x136032edU) {
|
||||||
output[FOUND] = output[NFLAG & nonce.y] = nonce.y;
|
found = atomic_add(&output[FOUND], 1);
|
||||||
|
output[found] = nonce.x;
|
||||||
|
}
|
||||||
|
if (Vals[2].y == 0x136032edU) {
|
||||||
|
found = atomic_add(&output[FOUND], 1);
|
||||||
|
output[found] = nonce.y;
|
||||||
|
}
|
||||||
#if defined(VECTORS4)
|
#if defined(VECTORS4)
|
||||||
if (Vals[2].z == 0x136032edU)
|
if (Vals[2].z == 0x136032edU) {
|
||||||
output[FOUND] = output[NFLAG & nonce.z] = nonce.z;
|
found = atomic_add(&output[FOUND], 1);
|
||||||
if (Vals[2].w == 0x136032edU)
|
output[found] = nonce.z;
|
||||||
output[FOUND] = output[NFLAG & nonce.w] = nonce.w;
|
}
|
||||||
|
if (Vals[2].w == 0x136032edU) {
|
||||||
|
found = atomic_add(&output[FOUND], 1);
|
||||||
|
output[found] = nonce.w;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
if ((Vals[2]+
|
if (Vals[2] == 0x136032edU) {
|
||||||
Ma(Vals[6],Vals[5],Vals[7])+
|
uint found = atomic_add(&output[FOUND], 1);
|
||||||
(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22))+
|
output[found] = nonce;
|
||||||
W[12]+
|
}
|
||||||
(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U))+
|
|
||||||
W[5]+
|
|
||||||
(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U))+
|
|
||||||
Vals[0]+
|
|
||||||
(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25))+
|
|
||||||
ch(Vals[1],Vals[4],Vals[3])) == 0x136032edU)
|
|
||||||
output[FOUND] = output[NFLAG & nonce] = nonce;
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -682,12 +682,11 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
|
|||||||
unshittify(X);
|
unshittify(X);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define FOUND (0x800)
|
#define FOUND (0x0F)
|
||||||
#define NFLAG (0x7FF)
|
|
||||||
|
|
||||||
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
|
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
|
||||||
__kernel void search(__global const uint4 * restrict input,
|
__kernel void search(__global const uint4 * restrict input,
|
||||||
__global uint*restrict output, __global uint4*restrict padcache,
|
volatile __global uint*restrict output, __global uint4*restrict padcache,
|
||||||
const uint4 midstate0, const uint4 midstate16, const uint target)
|
const uint4 midstate0, const uint4 midstate16, const uint target)
|
||||||
{
|
{
|
||||||
uint gid = get_global_id(0);
|
uint gid = get_global_id(0);
|
||||||
@ -721,9 +720,11 @@ const uint4 midstate0, const uint4 midstate16, const uint target)
|
|||||||
SHA256_fixed(&tmp0,&tmp1);
|
SHA256_fixed(&tmp0,&tmp1);
|
||||||
SHA256(&ostate0,&ostate1, tmp0, tmp1, (uint4)(0x80000000U, 0U, 0U, 0U), (uint4)(0U, 0U, 0U, 0x300U));
|
SHA256(&ostate0,&ostate1, tmp0, tmp1, (uint4)(0x80000000U, 0U, 0U, 0U), (uint4)(0U, 0U, 0U, 0x300U));
|
||||||
|
|
||||||
bool found = (EndianSwap(ostate1.w) <= target);
|
bool result = (EndianSwap(ostate1.w) <= target);
|
||||||
if (found)
|
if (result) {
|
||||||
output[FOUND] = output[NFLAG & gid] = gid;
|
uint found = atomic_add(&output[FOUND], 1);
|
||||||
|
output[found] = gid;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*-
|
/*-
|
||||||
|
Loading…
Reference in New Issue
Block a user