Browse Source

Scrypt-nfactor support!

Added new configuration parameter "nfactor", which defaults to 10 (normal scrypt).
Use 11 for vertcoin.

Kernels modified accordingly.
build-mingw
Teemu Suikki 11 years ago
parent
commit
73bb1504e9
  1. 15
      kernel/alexkarnew.cl
  2. 15
      kernel/alexkarold.cl
  3. 17
      kernel/ckolivas.cl
  4. 17
      kernel/psw.cl
  5. 19
      kernel/zuikkis.cl
  6. 1
      miner.h
  7. 11
      ocl.c
  8. 22
      scrypt.c
  9. 4
      sgminer.c

15
kernel/alexkarnew.cl

@ -28,6 +28,11 @@ @@ -28,6 +28,11 @@
* online backup system.
*/
/* Backwards compatibility, if NFACTOR not defined, default to 1024 scrypt */
#ifndef NFACTOR
#define NFACTOR 1024
#endif
__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
__constant uint K[] = {
0x428a2f98U,
@ -761,7 +766,7 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup) @@ -761,7 +766,7 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
uint CO=rotl(x,3U);
uint CO_tmp=rotl(xSIZE,3U);
for(uint y=0; y<1024/LOOKUP_GAP; ++y, CO+=CO_tmp)
for(uint y=0; y<NFACTOR/LOOKUP_GAP; ++y, CO+=CO_tmp)
{
uint CO_reg=CO;
#pragma unroll
@ -775,20 +780,20 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup) @@ -775,20 +780,20 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
#if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8)
{
uint y = (1024/LOOKUP_GAP);
uint y = (NFACTOR/LOOKUP_GAP);
CO=CO_tmp+rotl(y*xSIZE,3U);
#pragma unroll
for(uint z=0; z<zSIZE; ++z, ++CO)
lookup[CO] = X[z];
for(uint i=0; i<1024%LOOKUP_GAP; ++i)
for(uint i=0; i<NFACTOR%LOOKUP_GAP; ++i)
salsa(X);
}
#endif
for (uint i=0; i<1024; ++i)
for (uint i=0; i<NFACTOR; ++i)
{
uint4 V[8];
uint j = X[7].x & K[85];
uint j = X[7].x & (NFACTOR-1);
uint y = (j/LOOKUP_GAP);
uint CO_reg=CO_tmp+rotl(xSIZE*y,3U);

15
kernel/alexkarold.cl

@ -28,6 +28,11 @@ @@ -28,6 +28,11 @@
* online backup system.
*/
/* Backwards compatibility, if NFACTOR not defined, default to 1024 scrypt */
#ifndef NFACTOR
#define NFACTOR 1024
#endif
__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
__constant uint K[] = {
0x428a2f98U,
@ -761,7 +766,7 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup) @@ -761,7 +766,7 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
uint CO_tmp=xSIZE<<3U;
uint CO_tmp2=x<<3U;
for(uint y=0; y<1024/LOOKUP_GAP; ++y)
for(uint y=0; y<NFACTOR/LOOKUP_GAP; ++y)
{
uint CO=y*CO_tmp+CO_tmp2;
#pragma unroll
@ -773,19 +778,19 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup) @@ -773,19 +778,19 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
#if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8)
{
uint y = (1024/LOOKUP_GAP);
uint y = (NFACTOR/LOOKUP_GAP);
uint CO=y*CO_tmp+CO_tmp2;
#pragma unroll
for(uint z=0; z<zSIZE; ++z)
lookup[CO] = X[z];
for(uint i=0; i<1024%LOOKUP_GAP; ++i)
for(uint i=0; i<NFACTOR%LOOKUP_GAP; ++i)
salsa(X);
}
#endif
for (uint i=0; i<1024; ++i)
for (uint i=0; i<NFACTOR; ++i)
{
uint4 V[8];
uint j = X[7].x & K[85];
uint j = X[7].x & (NFACTOR-1);
uint y = (j/LOOKUP_GAP);
uint CO=y*CO_tmp+CO_tmp2;
#pragma unroll

17
kernel/ckolivas.cl

@ -28,6 +28,11 @@ @@ -28,6 +28,11 @@
* online backup system.
*/
/* Backwards compatibility, if NFACTOR not defined, default to 1024 scrypt */
#ifndef NFACTOR
#define NFACTOR 1024
#endif
__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
__constant uint K[] = {
0x428a2f98U,
@ -759,11 +764,11 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup) @@ -759,11 +764,11 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
{
shittify(X);
const uint zSIZE = 8;
const uint ySIZE = (1024/LOOKUP_GAP+(1024%LOOKUP_GAP>0));
const uint ySIZE = (NFACTOR/LOOKUP_GAP+(NFACTOR%LOOKUP_GAP>0));
const uint xSIZE = CONCURRENT_THREADS;
uint x = get_global_id(0)%xSIZE;
for(uint y=0; y<1024/LOOKUP_GAP; ++y)
for(uint y=0; y<NFACTOR/LOOKUP_GAP; ++y)
{
#pragma unroll
for(uint z=0; z<zSIZE; ++z)
@ -773,18 +778,18 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup) @@ -773,18 +778,18 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
}
#if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8)
{
uint y = (1024/LOOKUP_GAP);
uint y = (NFACTOR/LOOKUP_GAP);
#pragma unroll
for(uint z=0; z<zSIZE; ++z)
lookup[CO] = X[z];
for(uint i=0; i<1024%LOOKUP_GAP; ++i)
for(uint i=0; i<NFACTOR%LOOKUP_GAP; ++i)
salsa(X);
}
#endif
for (uint i=0; i<1024; ++i)
for (uint i=0; i<NFACTOR; ++i)
{
uint4 V[8];
uint j = X[7].x & K[85];
uint j = X[7].x & (NFACTOR-1);
uint y = (j/LOOKUP_GAP);
#pragma unroll
for(uint z=0; z<zSIZE; ++z)

17
kernel/psw.cl

@ -29,6 +29,11 @@ @@ -29,6 +29,11 @@
* online backup system.
*/
/* Backwards compatibility, if NFACTOR not defined, default to 1024 scrypt */
#ifndef NFACTOR
#define NFACTOR 1024
#endif
__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
__constant uint K[] = {
0x428a2f98U,
@ -698,11 +703,11 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup) @@ -698,11 +703,11 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
{
shittify(X);
const uint zSIZE = 8;
const uint ySIZE = (1024/LOOKUP_GAP+(1024%LOOKUP_GAP>0));
const uint ySIZE = (NFACTOR/LOOKUP_GAP+(NFACTOR%LOOKUP_GAP>0));
const uint xSIZE = CONCURRENT_THREADS;
uint x = get_global_id(0)%xSIZE;
for(uint y=0; y<1024/LOOKUP_GAP; ++y)
for(uint y=0; y<NFACTOR/LOOKUP_GAP; ++y)
{
#pragma unroll
for(uint z=0; z<zSIZE; ++z)
@ -712,18 +717,18 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup) @@ -712,18 +717,18 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
}
#if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8)
{
uint y = (1024/LOOKUP_GAP);
uint y = (NFACTOR/LOOKUP_GAP);
#pragma unroll
for(uint z=0; z<zSIZE; ++z)
lookup[CO] = X[z];
for(uint i=0; i<1024%LOOKUP_GAP; ++i)
for(uint i=0; i<NFACTOR%LOOKUP_GAP; ++i)
salsa(X);
}
#endif
for (uint i=0; i<1024; ++i)
for (uint i=0; i<NFACTOR; ++i)
{
uint4 V[8];
uint j = X[7].x & K[85];
uint j = X[7].x & (NFACTOR-1);
uint y = (j/LOOKUP_GAP);
#pragma unroll
for(uint z=0; z<zSIZE; ++z)

19
kernel/zuikkis.cl

@ -28,6 +28,11 @@ @@ -28,6 +28,11 @@
* online backup system.
*/
/* Backwards compatibility, if NFACTOR not defined, default to 1024 scrypt */
#ifndef NFACTOR
#define NFACTOR 1024
#endif
__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
__constant uint K[] = {
0x428a2f98U,
@ -759,11 +764,11 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup) @@ -759,11 +764,11 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
{
shittify(X);
const uint zSIZE = 8;
const uint ySIZE = (1024/LOOKUP_GAP+(1024%LOOKUP_GAP>0));
const uint ySIZE = (NFACTOR/LOOKUP_GAP+(NFACTOR%LOOKUP_GAP>0));
const uint xSIZE = CONCURRENT_THREADS;
uint x = get_global_id(0)%xSIZE;
for(uint y=0; y<1024/LOOKUP_GAP; ++y)
for(uint y=0; y<(NFACTOR/LOOKUP_GAP); ++y)
{
for(uint z=0; z<zSIZE; ++z)
@ -771,9 +776,9 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup) @@ -771,9 +776,9 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
for(uint i=0; i<LOOKUP_GAP; ++i)
salsa(X);
}
for (uint i=0; i<1024; ++i)
for (uint i=0; i<NFACTOR; ++i)
{
uint j = X[7].x & K[85];
uint j = X[7].x & (NFACTOR-1);
uint y = (j/LOOKUP_GAP);
if (j&1)
@ -823,11 +828,11 @@ const uint4 midstate0, const uint4 midstate16, const uint target) @@ -823,11 +828,11 @@ const uint4 midstate0, const uint4 midstate16, const uint target)
{
pad0 = tstate0;
pad1 = tstate1;
X[i<<1 ] = ostate0;
X[(i<<1)+1] = ostate1;
X[i*2 ] = ostate0;
X[i*2+1] = ostate1;
SHA256(&pad0,&pad1, data, (uint4)(i+1,K[84],0,0), (uint4)(0,0,0,0), (uint4)(0,0,0, K[87]));
SHA256(X+(i<<1),X+(i<<1)+1, pad0, pad1, (uint4)(K[84], 0U, 0U, 0U), (uint4)(0U, 0U, 0U, K[88]));
SHA256(X+i*2,X+i*2+1, pad0, pad1, (uint4)(K[84], 0U, 0U, 0U), (uint4)(0U, 0U, 0U, K[88]));
}
scrypt_core(X,padcache);
SHA256(&tmp0,&tmp1, X[0], X[1], X[2], X[3]);

1
miner.h

@ -1018,6 +1018,7 @@ extern bool fulltest(const unsigned char *hash, const unsigned char *target); @@ -1018,6 +1018,7 @@ extern bool fulltest(const unsigned char *hash, const unsigned char *target);
extern int opt_queue;
extern int opt_scantime;
extern int opt_expiry;
extern int opt_nfactor;
extern cglock_t control_lock;
extern pthread_mutex_t hash_lock;

11
ocl.c

@ -225,6 +225,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize) @@ -225,6 +225,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
cl_uint numPlatforms;
cl_uint numDevices;
cl_int status;
int nfactor = (1<<opt_nfactor);
status = clGetPlatformIDs(0, NULL, &numPlatforms);
if (status != CL_SUCCESS) {
@ -481,7 +482,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize) @@ -481,7 +482,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
if (!cgpu->opt_tc) {
unsigned int sixtyfours;
sixtyfours = cgpu->max_alloc / 131072 / 64 - 1;
sixtyfours = cgpu->max_alloc / 131072 / 64 / (nfactor/1024)- 1;
cgpu->thread_concurrency = sixtyfours * 64;
if (cgpu->shaders && cgpu->thread_concurrency > cgpu->shaders) {
cgpu->thread_concurrency -= cgpu->thread_concurrency % cgpu->shaders;
@ -521,7 +522,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize) @@ -521,7 +522,7 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize)
if (clState->goffset)
strcat(binaryfilename, "g");
sprintf(numbuf, "lg%utc%u", cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency);
sprintf(numbuf, "lg%utc%un%u", cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency,opt_nfactor);
strcat(binaryfilename, numbuf);
sprintf(numbuf, "w%d", (int)clState->wsize);
@ -587,8 +588,8 @@ build: @@ -587,8 +588,8 @@ build:
/* create a cl program executable for all the devices specified */
char *CompilerOptions = (char *)calloc(1, 256);
sprintf(CompilerOptions, "-D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%d -D WORKSIZE=%d",
cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency, (int)clState->wsize);
sprintf(CompilerOptions, "-D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%d -D WORKSIZE=%d -D NFACTOR=%d",
cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency, (int)clState->wsize,(unsigned int)nfactor);
applog(LOG_DEBUG, "Setting worksize to %d", (int)(clState->wsize));
if (clState->vwidth > 1)
@ -777,7 +778,7 @@ built: @@ -777,7 +778,7 @@ built:
return NULL;
}
size_t ipt = (1024 / cgpu->lookup_gap + (1024 % cgpu->lookup_gap > 0));
size_t ipt = (nfactor / cgpu->lookup_gap + (nfactor % cgpu->lookup_gap > 0));
size_t bufsize = 128 * ipt * cgpu->thread_concurrency;
/* Use the max alloc value which has been rounded to a power of

22
scrypt.c

@ -356,7 +356,7 @@ salsa20_8(uint32_t B[16], const uint32_t Bx[16]) @@ -356,7 +356,7 @@ salsa20_8(uint32_t B[16], const uint32_t Bx[16])
/* cpu and memory intensive function to transform a 80 byte buffer into a 32 byte output
scratchpad size needs to be at least 63 + (128 * r * p) + (256 * r + 64) + (128 * r * N) bytes
*/
static void scrypt_1024_1_1_256_sp(const uint32_t* input, char* scratchpad, uint32_t *ostate)
static void scrypt_1024_1_1_256_sp(const uint32_t* input, char* scratchpad, uint32_t *ostate, const cl_uint n)
{
uint32_t * V;
uint32_t X[32];
@ -370,7 +370,7 @@ static void scrypt_1024_1_1_256_sp(const uint32_t* input, char* scratchpad, uint @@ -370,7 +370,7 @@ static void scrypt_1024_1_1_256_sp(const uint32_t* input, char* scratchpad, uint
PBKDF2_SHA256_80_128(input, X);
for (i = 0; i < 1024; i += 2) {
for (i = 0; i < n; i += 2) {
memcpy(&V[i * 32], X, 128);
salsa20_8(&X[0], &X[16]);
@ -381,8 +381,8 @@ static void scrypt_1024_1_1_256_sp(const uint32_t* input, char* scratchpad, uint @@ -381,8 +381,8 @@ static void scrypt_1024_1_1_256_sp(const uint32_t* input, char* scratchpad, uint
salsa20_8(&X[0], &X[16]);
salsa20_8(&X[16], &X[0]);
}
for (i = 0; i < 1024; i += 2) {
j = X[16] & 1023;
for (i = 0; i < n; i += 2) {
j = X[16] & (n-1);
p2 = (uint64_t *)(&V[j * 32]);
for(k = 0; k < 16; k++)
p1[k] ^= p2[k];
@ -390,7 +390,7 @@ static void scrypt_1024_1_1_256_sp(const uint32_t* input, char* scratchpad, uint @@ -390,7 +390,7 @@ static void scrypt_1024_1_1_256_sp(const uint32_t* input, char* scratchpad, uint
salsa20_8(&X[0], &X[16]);
salsa20_8(&X[16], &X[0]);
j = X[16] & 1023;
j = X[16] & (n-1);
p2 = (uint64_t *)(&V[j * 32]);
for(k = 0; k < 16; k++)
p1[k] ^= p2[k];
@ -403,7 +403,8 @@ static void scrypt_1024_1_1_256_sp(const uint32_t* input, char* scratchpad, uint @@ -403,7 +403,8 @@ static void scrypt_1024_1_1_256_sp(const uint32_t* input, char* scratchpad, uint
}
/* 131583 rounded up to 4 byte alignment */
#define SCRATCHBUF_SIZE (131584)
//#define SCRATCHBUF_SIZE (131584)
//#define SCRATCHBUF_SIZE (262207)
void scrypt_regenhash(struct work *work)
{
@ -411,17 +412,19 @@ void scrypt_regenhash(struct work *work) @@ -411,17 +412,19 @@ void scrypt_regenhash(struct work *work)
char *scratchbuf;
uint32_t *nonce = (uint32_t *)(work->data + 76);
uint32_t *ohash = (uint32_t *)(work->hash);
be32enc_vect(data, (const uint32_t *)work->data, 19);
data[19] = htobe32(*nonce);
scratchbuf = (char *)alloca(SCRATCHBUF_SIZE);
scrypt_1024_1_1_256_sp(data, scratchbuf, ohash);
//scratchbuf = alloca(SCRATCHBUF_SIZE);
scratchbuf = (char *)alloca((1 << opt_nfactor) * 128 + 512);
scrypt_1024_1_1_256_sp(data, scratchbuf, ohash, (1 << opt_nfactor));
flip32(ohash, ohash);
}
static const uint32_t diff1targ = 0x0000ffff;
/* Used externally as confirmation of correct OCL code */
/*
int scrypt_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce)
{
uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]);
@ -489,3 +492,4 @@ bool scanhash_scrypt(struct thr_info *thr, const unsigned char __maybe_unused *p @@ -489,3 +492,4 @@ bool scanhash_scrypt(struct thr_info *thr, const unsigned char __maybe_unused *p
free(scratchbuf);;
return ret;
}
*/

4
sgminer.c

@ -92,6 +92,7 @@ int opt_log_interval = 5; @@ -92,6 +92,7 @@ int opt_log_interval = 5;
int opt_queue = 1;
int opt_scantime = 7;
int opt_expiry = 28;
int opt_nfactor = 11;
static const bool opt_time = true;
unsigned long long global_hashrate;
unsigned long global_quota_gcd = 1;
@ -1105,6 +1106,9 @@ static struct opt_table opt_config_table[] = { @@ -1105,6 +1106,9 @@ static struct opt_table opt_config_table[] = {
opt_set_bool, &opt_compact,
"Use compact display without per device statistics"),
#endif
OPT_WITH_ARG("--nfactor",
set_int_0_to_9999, opt_show_intval, &opt_nfactor,
"Set scrypt nfactor, default: 10. Currently use 11 for vertcoin!"),
OPT_WITHOUT_ARG("--debug|-D",
enable_debug, &opt_debug,
"Enable debug output"),

Loading…
Cancel
Save