mirror of
https://github.com/GOSTSec/sgminer
synced 2025-02-01 01:14:22 +00:00
Scrypt-nfactor support!
Added new configuration parameter "nfactor", which defaults to 10 (normal scrypt). Use 11 for vertcoin. Kernels modified accordingly.
This commit is contained in:
parent
44aed3570e
commit
73bb1504e9
@ -28,6 +28,11 @@
|
||||
* online backup system.
|
||||
*/
|
||||
|
||||
/* Backwards compatibility, if NFACTOR not defined, default to 1024 scrypt */
|
||||
#ifndef NFACTOR
|
||||
#define NFACTOR 1024
|
||||
#endif
|
||||
|
||||
__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
|
||||
__constant uint K[] = {
|
||||
0x428a2f98U,
|
||||
@ -761,7 +766,7 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
|
||||
uint CO=rotl(x,3U);
|
||||
uint CO_tmp=rotl(xSIZE,3U);
|
||||
|
||||
for(uint y=0; y<1024/LOOKUP_GAP; ++y, CO+=CO_tmp)
|
||||
for(uint y=0; y<NFACTOR/LOOKUP_GAP; ++y, CO+=CO_tmp)
|
||||
{
|
||||
uint CO_reg=CO;
|
||||
#pragma unroll
|
||||
@ -775,20 +780,20 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
|
||||
|
||||
#if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8)
|
||||
{
|
||||
uint y = (1024/LOOKUP_GAP);
|
||||
uint y = (NFACTOR/LOOKUP_GAP);
|
||||
CO=CO_tmp+rotl(y*xSIZE,3U);
|
||||
#pragma unroll
|
||||
for(uint z=0; z<zSIZE; ++z, ++CO)
|
||||
lookup[CO] = X[z];
|
||||
for(uint i=0; i<1024%LOOKUP_GAP; ++i)
|
||||
for(uint i=0; i<NFACTOR%LOOKUP_GAP; ++i)
|
||||
salsa(X);
|
||||
}
|
||||
#endif
|
||||
|
||||
for (uint i=0; i<1024; ++i)
|
||||
for (uint i=0; i<NFACTOR; ++i)
|
||||
{
|
||||
uint4 V[8];
|
||||
uint j = X[7].x & K[85];
|
||||
uint j = X[7].x & (NFACTOR-1);
|
||||
uint y = (j/LOOKUP_GAP);
|
||||
uint CO_reg=CO_tmp+rotl(xSIZE*y,3U);
|
||||
|
||||
|
@ -28,6 +28,11 @@
|
||||
* online backup system.
|
||||
*/
|
||||
|
||||
/* Backwards compatibility, if NFACTOR not defined, default to 1024 scrypt */
|
||||
#ifndef NFACTOR
|
||||
#define NFACTOR 1024
|
||||
#endif
|
||||
|
||||
__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
|
||||
__constant uint K[] = {
|
||||
0x428a2f98U,
|
||||
@ -761,7 +766,7 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
|
||||
uint CO_tmp=xSIZE<<3U;
|
||||
uint CO_tmp2=x<<3U;
|
||||
|
||||
for(uint y=0; y<1024/LOOKUP_GAP; ++y)
|
||||
for(uint y=0; y<NFACTOR/LOOKUP_GAP; ++y)
|
||||
{
|
||||
uint CO=y*CO_tmp+CO_tmp2;
|
||||
#pragma unroll
|
||||
@ -773,19 +778,19 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
|
||||
|
||||
#if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8)
|
||||
{
|
||||
uint y = (1024/LOOKUP_GAP);
|
||||
uint y = (NFACTOR/LOOKUP_GAP);
|
||||
uint CO=y*CO_tmp+CO_tmp2;
|
||||
#pragma unroll
|
||||
for(uint z=0; z<zSIZE; ++z)
|
||||
lookup[CO] = X[z];
|
||||
for(uint i=0; i<1024%LOOKUP_GAP; ++i)
|
||||
for(uint i=0; i<NFACTOR%LOOKUP_GAP; ++i)
|
||||
salsa(X);
|
||||
}
|
||||
#endif
|
||||
for (uint i=0; i<1024; ++i)
|
||||
for (uint i=0; i<NFACTOR; ++i)
|
||||
{
|
||||
uint4 V[8];
|
||||
uint j = X[7].x & K[85];
|
||||
uint j = X[7].x & (NFACTOR-1);
|
||||
uint y = (j/LOOKUP_GAP);
|
||||
uint CO=y*CO_tmp+CO_tmp2;
|
||||
#pragma unroll
|
||||
|
@ -28,6 +28,11 @@
|
||||
* online backup system.
|
||||
*/
|
||||
|
||||
/* Backwards compatibility, if NFACTOR not defined, default to 1024 scrypt */
|
||||
#ifndef NFACTOR
|
||||
#define NFACTOR 1024
|
||||
#endif
|
||||
|
||||
__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
|
||||
__constant uint K[] = {
|
||||
0x428a2f98U,
|
||||
@ -759,11 +764,11 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
|
||||
{
|
||||
shittify(X);
|
||||
const uint zSIZE = 8;
|
||||
const uint ySIZE = (1024/LOOKUP_GAP+(1024%LOOKUP_GAP>0));
|
||||
const uint ySIZE = (NFACTOR/LOOKUP_GAP+(NFACTOR%LOOKUP_GAP>0));
|
||||
const uint xSIZE = CONCURRENT_THREADS;
|
||||
uint x = get_global_id(0)%xSIZE;
|
||||
|
||||
for(uint y=0; y<1024/LOOKUP_GAP; ++y)
|
||||
for(uint y=0; y<NFACTOR/LOOKUP_GAP; ++y)
|
||||
{
|
||||
#pragma unroll
|
||||
for(uint z=0; z<zSIZE; ++z)
|
||||
@ -773,18 +778,18 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
|
||||
}
|
||||
#if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8)
|
||||
{
|
||||
uint y = (1024/LOOKUP_GAP);
|
||||
uint y = (NFACTOR/LOOKUP_GAP);
|
||||
#pragma unroll
|
||||
for(uint z=0; z<zSIZE; ++z)
|
||||
lookup[CO] = X[z];
|
||||
for(uint i=0; i<1024%LOOKUP_GAP; ++i)
|
||||
for(uint i=0; i<NFACTOR%LOOKUP_GAP; ++i)
|
||||
salsa(X);
|
||||
}
|
||||
#endif
|
||||
for (uint i=0; i<1024; ++i)
|
||||
for (uint i=0; i<NFACTOR; ++i)
|
||||
{
|
||||
uint4 V[8];
|
||||
uint j = X[7].x & K[85];
|
||||
uint j = X[7].x & (NFACTOR-1);
|
||||
uint y = (j/LOOKUP_GAP);
|
||||
#pragma unroll
|
||||
for(uint z=0; z<zSIZE; ++z)
|
||||
|
@ -29,6 +29,11 @@
|
||||
* online backup system.
|
||||
*/
|
||||
|
||||
/* Backwards compatibility, if NFACTOR not defined, default to 1024 scrypt */
|
||||
#ifndef NFACTOR
|
||||
#define NFACTOR 1024
|
||||
#endif
|
||||
|
||||
__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
|
||||
__constant uint K[] = {
|
||||
0x428a2f98U,
|
||||
@ -698,11 +703,11 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
|
||||
{
|
||||
shittify(X);
|
||||
const uint zSIZE = 8;
|
||||
const uint ySIZE = (1024/LOOKUP_GAP+(1024%LOOKUP_GAP>0));
|
||||
const uint ySIZE = (NFACTOR/LOOKUP_GAP+(NFACTOR%LOOKUP_GAP>0));
|
||||
const uint xSIZE = CONCURRENT_THREADS;
|
||||
uint x = get_global_id(0)%xSIZE;
|
||||
|
||||
for(uint y=0; y<1024/LOOKUP_GAP; ++y)
|
||||
for(uint y=0; y<NFACTOR/LOOKUP_GAP; ++y)
|
||||
{
|
||||
#pragma unroll
|
||||
for(uint z=0; z<zSIZE; ++z)
|
||||
@ -712,18 +717,18 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
|
||||
}
|
||||
#if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8)
|
||||
{
|
||||
uint y = (1024/LOOKUP_GAP);
|
||||
uint y = (NFACTOR/LOOKUP_GAP);
|
||||
#pragma unroll
|
||||
for(uint z=0; z<zSIZE; ++z)
|
||||
lookup[CO] = X[z];
|
||||
for(uint i=0; i<1024%LOOKUP_GAP; ++i)
|
||||
for(uint i=0; i<NFACTOR%LOOKUP_GAP; ++i)
|
||||
salsa(X);
|
||||
}
|
||||
#endif
|
||||
for (uint i=0; i<1024; ++i)
|
||||
for (uint i=0; i<NFACTOR; ++i)
|
||||
{
|
||||
uint4 V[8];
|
||||
uint j = X[7].x & K[85];
|
||||
uint j = X[7].x & (NFACTOR-1);
|
||||
uint y = (j/LOOKUP_GAP);
|
||||
#pragma unroll
|
||||
for(uint z=0; z<zSIZE; ++z)
|
||||
|
@ -28,6 +28,11 @@
|
||||
* online backup system.
|
||||
*/
|
||||
|
||||
/* Backwards compatibility, if NFACTOR not defined, default to 1024 scrypt */
|
||||
#ifndef NFACTOR
|
||||
#define NFACTOR 1024
|
||||
#endif
|
||||
|
||||
__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
|
||||
__constant uint K[] = {
|
||||
0x428a2f98U,
|
||||
@ -759,11 +764,11 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
|
||||
{
|
||||
shittify(X);
|
||||
const uint zSIZE = 8;
|
||||
const uint ySIZE = (1024/LOOKUP_GAP+(1024%LOOKUP_GAP>0));
|
||||
const uint ySIZE = (NFACTOR/LOOKUP_GAP+(NFACTOR%LOOKUP_GAP>0));
|
||||
const uint xSIZE = CONCURRENT_THREADS;
|
||||
uint x = get_global_id(0)%xSIZE;
|
||||
|
||||
for(uint y=0; y<1024/LOOKUP_GAP; ++y)
|
||||
for(uint y=0; y<(NFACTOR/LOOKUP_GAP); ++y)
|
||||
{
|
||||
|
||||
for(uint z=0; z<zSIZE; ++z)
|
||||
@ -771,9 +776,9 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
|
||||
for(uint i=0; i<LOOKUP_GAP; ++i)
|
||||
salsa(X);
|
||||
}
|
||||
for (uint i=0; i<1024; ++i)
|
||||
for (uint i=0; i<NFACTOR; ++i)
|
||||
{
|
||||
uint j = X[7].x & K[85];
|
||||
uint j = X[7].x & (NFACTOR-1);
|
||||
uint y = (j/LOOKUP_GAP);
|
||||
|
||||
if (j&1)
|
||||
@ -823,11 +828,11 @@ const uint4 midstate0, const uint4 midstate16, const uint target)
|
||||
{
|
||||
pad0 = tstate0;
|
||||
pad1 = tstate1;
|
||||
X[i<<1 ] = ostate0;
|
||||
X[(i<<1)+1] = ostate1;
|
||||
X[i*2 ] = ostate0;
|
||||
X[i*2+1] = ostate1;
|
||||
|
||||
SHA256(&pad0,&pad1, data, (uint4)(i+1,K[84],0,0), (uint4)(0,0,0,0), (uint4)(0,0,0, K[87]));
|
||||
SHA256(X+(i<<1),X+(i<<1)+1, pad0, pad1, (uint4)(K[84], 0U, 0U, 0U), (uint4)(0U, 0U, 0U, K[88]));
|
||||
SHA256(X+i*2,X+i*2+1, pad0, pad1, (uint4)(K[84], 0U, 0U, 0U), (uint4)(0U, 0U, 0U, K[88]));
|
||||
}
|
||||
scrypt_core(X,padcache);
|
||||
SHA256(&tmp0,&tmp1, X[0], X[1], X[2], X[3]);
|
||||
|
1
miner.h
1
miner.h
@ -1018,6 +1018,7 @@ extern bool fulltest(const unsigned char *hash, const unsigned char *target);
|
||||
extern int opt_queue;
|
||||
extern int opt_scantime;
|
||||
extern int opt_expiry;
|
||||
extern int opt_nfactor;
|
||||
|
||||
extern cglock_t control_lock;
|
||||
extern pthread_mutex_t hash_lock;
|
||||
|
11
ocl.c
11
ocl.c
@ -225,6 +225,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
|
||||
cl_uint numPlatforms;
|
||||
cl_uint numDevices;
|
||||
cl_int status;
|
||||
int nfactor = (1<<opt_nfactor);
|
||||
|
||||
status = clGetPlatformIDs(0, NULL, &numPlatforms);
|
||||
if (status != CL_SUCCESS) {
|
||||
@ -481,7 +482,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
|
||||
if (!cgpu->opt_tc) {
|
||||
unsigned int sixtyfours;
|
||||
|
||||
sixtyfours = cgpu->max_alloc / 131072 / 64 - 1;
|
||||
sixtyfours = cgpu->max_alloc / 131072 / 64 / (nfactor/1024)- 1;
|
||||
cgpu->thread_concurrency = sixtyfours * 64;
|
||||
if (cgpu->shaders && cgpu->thread_concurrency > cgpu->shaders) {
|
||||
cgpu->thread_concurrency -= cgpu->thread_concurrency % cgpu->shaders;
|
||||
@ -521,7 +522,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
|
||||
if (clState->goffset)
|
||||
strcat(binaryfilename, "g");
|
||||
|
||||
sprintf(numbuf, "lg%utc%u", cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency);
|
||||
sprintf(numbuf, "lg%utc%un%u", cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency,opt_nfactor);
|
||||
strcat(binaryfilename, numbuf);
|
||||
|
||||
sprintf(numbuf, "w%d", (int)clState->wsize);
|
||||
@ -587,8 +588,8 @@ build:
|
||||
/* create a cl program executable for all the devices specified */
|
||||
char *CompilerOptions = (char *)calloc(1, 256);
|
||||
|
||||
sprintf(CompilerOptions, "-D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%d -D WORKSIZE=%d",
|
||||
cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency, (int)clState->wsize);
|
||||
sprintf(CompilerOptions, "-D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%d -D WORKSIZE=%d -D NFACTOR=%d",
|
||||
cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency, (int)clState->wsize,(unsigned int)nfactor);
|
||||
|
||||
applog(LOG_DEBUG, "Setting worksize to %d", (int)(clState->wsize));
|
||||
if (clState->vwidth > 1)
|
||||
@ -777,7 +778,7 @@ built:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
size_t ipt = (1024 / cgpu->lookup_gap + (1024 % cgpu->lookup_gap > 0));
|
||||
size_t ipt = (nfactor / cgpu->lookup_gap + (nfactor % cgpu->lookup_gap > 0));
|
||||
size_t bufsize = 128 * ipt * cgpu->thread_concurrency;
|
||||
|
||||
/* Use the max alloc value which has been rounded to a power of
|
||||
|
22
scrypt.c
22
scrypt.c
@ -356,7 +356,7 @@ salsa20_8(uint32_t B[16], const uint32_t Bx[16])
|
||||
/* cpu and memory intensive function to transform a 80 byte buffer into a 32 byte output
|
||||
scratchpad size needs to be at least 63 + (128 * r * p) + (256 * r + 64) + (128 * r * N) bytes
|
||||
*/
|
||||
static void scrypt_1024_1_1_256_sp(const uint32_t* input, char* scratchpad, uint32_t *ostate)
|
||||
static void scrypt_1024_1_1_256_sp(const uint32_t* input, char* scratchpad, uint32_t *ostate, const cl_uint n)
|
||||
{
|
||||
uint32_t * V;
|
||||
uint32_t X[32];
|
||||
@ -370,7 +370,7 @@ static void scrypt_1024_1_1_256_sp(const uint32_t* input, char* scratchpad, uint
|
||||
|
||||
PBKDF2_SHA256_80_128(input, X);
|
||||
|
||||
for (i = 0; i < 1024; i += 2) {
|
||||
for (i = 0; i < n; i += 2) {
|
||||
memcpy(&V[i * 32], X, 128);
|
||||
|
||||
salsa20_8(&X[0], &X[16]);
|
||||
@ -381,8 +381,8 @@ static void scrypt_1024_1_1_256_sp(const uint32_t* input, char* scratchpad, uint
|
||||
salsa20_8(&X[0], &X[16]);
|
||||
salsa20_8(&X[16], &X[0]);
|
||||
}
|
||||
for (i = 0; i < 1024; i += 2) {
|
||||
j = X[16] & 1023;
|
||||
for (i = 0; i < n; i += 2) {
|
||||
j = X[16] & (n-1);
|
||||
p2 = (uint64_t *)(&V[j * 32]);
|
||||
for(k = 0; k < 16; k++)
|
||||
p1[k] ^= p2[k];
|
||||
@ -390,7 +390,7 @@ static void scrypt_1024_1_1_256_sp(const uint32_t* input, char* scratchpad, uint
|
||||
salsa20_8(&X[0], &X[16]);
|
||||
salsa20_8(&X[16], &X[0]);
|
||||
|
||||
j = X[16] & 1023;
|
||||
j = X[16] & (n-1);
|
||||
p2 = (uint64_t *)(&V[j * 32]);
|
||||
for(k = 0; k < 16; k++)
|
||||
p1[k] ^= p2[k];
|
||||
@ -403,7 +403,8 @@ static void scrypt_1024_1_1_256_sp(const uint32_t* input, char* scratchpad, uint
|
||||
}
|
||||
|
||||
/* 131583 rounded up to 4 byte alignment */
|
||||
#define SCRATCHBUF_SIZE (131584)
|
||||
//#define SCRATCHBUF_SIZE (131584)
|
||||
//#define SCRATCHBUF_SIZE (262207)
|
||||
|
||||
void scrypt_regenhash(struct work *work)
|
||||
{
|
||||
@ -411,17 +412,19 @@ void scrypt_regenhash(struct work *work)
|
||||
char *scratchbuf;
|
||||
uint32_t *nonce = (uint32_t *)(work->data + 76);
|
||||
uint32_t *ohash = (uint32_t *)(work->hash);
|
||||
|
||||
|
||||
be32enc_vect(data, (const uint32_t *)work->data, 19);
|
||||
data[19] = htobe32(*nonce);
|
||||
scratchbuf = (char *)alloca(SCRATCHBUF_SIZE);
|
||||
scrypt_1024_1_1_256_sp(data, scratchbuf, ohash);
|
||||
//scratchbuf = alloca(SCRATCHBUF_SIZE);
|
||||
scratchbuf = (char *)alloca((1 << opt_nfactor) * 128 + 512);
|
||||
scrypt_1024_1_1_256_sp(data, scratchbuf, ohash, (1 << opt_nfactor));
|
||||
flip32(ohash, ohash);
|
||||
}
|
||||
|
||||
static const uint32_t diff1targ = 0x0000ffff;
|
||||
|
||||
/* Used externally as confirmation of correct OCL code */
|
||||
/*
|
||||
int scrypt_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
|
||||
{
|
||||
uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]);
|
||||
@ -489,3 +492,4 @@ bool scanhash_scrypt(struct thr_info *thr, const unsigned char __maybe_unused *p
|
||||
free(scratchbuf);;
|
||||
return ret;
|
||||
}
|
||||
*/
|
||||
|
@ -92,6 +92,7 @@ int opt_log_interval = 5;
|
||||
int opt_queue = 1;
|
||||
int opt_scantime = 7;
|
||||
int opt_expiry = 28;
|
||||
int opt_nfactor = 11;
|
||||
static const bool opt_time = true;
|
||||
unsigned long long global_hashrate;
|
||||
unsigned long global_quota_gcd = 1;
|
||||
@ -1105,6 +1106,9 @@ static struct opt_table opt_config_table[] = {
|
||||
opt_set_bool, &opt_compact,
|
||||
"Use compact display without per device statistics"),
|
||||
#endif
|
||||
OPT_WITH_ARG("--nfactor",
|
||||
set_int_0_to_9999, opt_show_intval, &opt_nfactor,
|
||||
"Set scrypt nfactor, default: 10. Currently use 11 for vertcoin!"),
|
||||
OPT_WITHOUT_ARG("--debug|-D",
|
||||
enable_debug, &opt_debug,
|
||||
"Enable debug output"),
|
||||
|
Loading…
x
Reference in New Issue
Block a user