mirror of
https://github.com/GOSTSec/sgminer
synced 2025-01-31 17:04:23 +00:00
kernel: use N-factor table instead of passing parameter N directly.
This commit is contained in:
parent
3afaaf1bfa
commit
e18bcc2582
@ -28,9 +28,34 @@
|
||||
* online backup system.
|
||||
*/
|
||||
|
||||
/* Backwards compatibility, if NFACTOR not defined, default to 1024 scrypt */
|
||||
/* N (nfactor), CPU/Memory cost parameter */
|
||||
__constant uint N[] = {
|
||||
0x00000001U, /* never used, padding */
|
||||
0x00000002U,
|
||||
0x00000004U,
|
||||
0x00000008U,
|
||||
0x00000010U,
|
||||
0x00000020U,
|
||||
0x00000040U,
|
||||
0x00000080U,
|
||||
0x00000100U,
|
||||
0x00000200U,
|
||||
0x00000400U, /* 2^10 == 1024, Litecoin scrypt default */
|
||||
0x00000800U,
|
||||
0x00001000U,
|
||||
0x00002000U,
|
||||
0x00004000U,
|
||||
0x00008000U,
|
||||
0x00010000U,
|
||||
0x00020000U,
|
||||
0x00040000U,
|
||||
0x00080000U,
|
||||
0x00100000U
|
||||
};
|
||||
|
||||
/* Backwards compatibility, if NFACTOR not defined, default to 10 for scrypt */
|
||||
#ifndef NFACTOR
|
||||
#define NFACTOR 1024
|
||||
#define NFACTOR 10
|
||||
#endif
|
||||
|
||||
__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
|
||||
@ -766,7 +791,7 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
|
||||
uint CO=rotl(x,3U);
|
||||
uint CO_tmp=rotl(xSIZE,3U);
|
||||
|
||||
for(uint y=0; y<NFACTOR/LOOKUP_GAP; ++y, CO+=CO_tmp)
|
||||
for(uint y=0; y<N[NFACTOR]/LOOKUP_GAP; ++y, CO+=CO_tmp)
|
||||
{
|
||||
uint CO_reg=CO;
|
||||
#pragma unroll
|
||||
@ -780,20 +805,20 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
|
||||
|
||||
#if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8)
|
||||
{
|
||||
uint y = (NFACTOR/LOOKUP_GAP);
|
||||
uint y = (N[NFACTOR]/LOOKUP_GAP);
|
||||
CO=CO_tmp+rotl(y*xSIZE,3U);
|
||||
#pragma unroll
|
||||
for(uint z=0; z<zSIZE; ++z, ++CO)
|
||||
lookup[CO] = X[z];
|
||||
for(uint i=0; i<NFACTOR%LOOKUP_GAP; ++i)
|
||||
for(uint i=0; i<N[NFACTOR]%LOOKUP_GAP; ++i)
|
||||
salsa(X);
|
||||
}
|
||||
#endif
|
||||
|
||||
for (uint i=0; i<NFACTOR; ++i)
|
||||
for (uint i=0; i<N[NFACTOR]; ++i)
|
||||
{
|
||||
uint4 V[8];
|
||||
uint j = X[7].x & (NFACTOR-1);
|
||||
uint j = X[7].x & (N[NFACTOR]-1);
|
||||
uint y = (j/LOOKUP_GAP);
|
||||
uint CO_reg=CO_tmp+rotl(xSIZE*y,3U);
|
||||
|
||||
|
@ -28,9 +28,34 @@
|
||||
* online backup system.
|
||||
*/
|
||||
|
||||
/* Backwards compatibility, if NFACTOR not defined, default to 1024 scrypt */
|
||||
/* N (nfactor), CPU/Memory cost parameter */
|
||||
__constant uint N[] = {
|
||||
0x00000001U, /* never used, padding */
|
||||
0x00000002U,
|
||||
0x00000004U,
|
||||
0x00000008U,
|
||||
0x00000010U,
|
||||
0x00000020U,
|
||||
0x00000040U,
|
||||
0x00000080U,
|
||||
0x00000100U,
|
||||
0x00000200U,
|
||||
0x00000400U, /* 2^10 == 1024, Litecoin scrypt default */
|
||||
0x00000800U,
|
||||
0x00001000U,
|
||||
0x00002000U,
|
||||
0x00004000U,
|
||||
0x00008000U,
|
||||
0x00010000U,
|
||||
0x00020000U,
|
||||
0x00040000U,
|
||||
0x00080000U,
|
||||
0x00100000U
|
||||
};
|
||||
|
||||
/* Backwards compatibility, if NFACTOR not defined, default to 10 for scrypt */
|
||||
#ifndef NFACTOR
|
||||
#define NFACTOR 1024
|
||||
#define NFACTOR 10
|
||||
#endif
|
||||
|
||||
__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
|
||||
@ -766,7 +791,7 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
|
||||
uint CO_tmp=xSIZE<<3U;
|
||||
uint CO_tmp2=x<<3U;
|
||||
|
||||
for(uint y=0; y<NFACTOR/LOOKUP_GAP; ++y)
|
||||
for(uint y=0; y<N[NFACTOR]/LOOKUP_GAP; ++y)
|
||||
{
|
||||
uint CO=y*CO_tmp+CO_tmp2;
|
||||
#pragma unroll
|
||||
@ -778,19 +803,19 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
|
||||
|
||||
#if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8)
|
||||
{
|
||||
uint y = (NFACTOR/LOOKUP_GAP);
|
||||
uint y = (N[NFACTOR]/LOOKUP_GAP);
|
||||
uint CO=y*CO_tmp+CO_tmp2;
|
||||
#pragma unroll
|
||||
for(uint z=0; z<zSIZE; ++z)
|
||||
lookup[CO] = X[z];
|
||||
for(uint i=0; i<NFACTOR%LOOKUP_GAP; ++i)
|
||||
for(uint i=0; i<N[NFACTOR]%LOOKUP_GAP; ++i)
|
||||
salsa(X);
|
||||
}
|
||||
#endif
|
||||
for (uint i=0; i<NFACTOR; ++i)
|
||||
for (uint i=0; i<N[NFACTOR]; ++i)
|
||||
{
|
||||
uint4 V[8];
|
||||
uint j = X[7].x & (NFACTOR-1);
|
||||
uint j = X[7].x & (N[NFACTOR]-1);
|
||||
uint y = (j/LOOKUP_GAP);
|
||||
uint CO=y*CO_tmp+CO_tmp2;
|
||||
#pragma unroll
|
||||
|
@ -28,9 +28,34 @@
|
||||
* online backup system.
|
||||
*/
|
||||
|
||||
/* Backwards compatibility, if NFACTOR not defined, default to 1024 scrypt */
|
||||
/* N (nfactor), CPU/Memory cost parameter */
|
||||
__constant uint N[] = {
|
||||
0x00000001U, /* never used, padding */
|
||||
0x00000002U,
|
||||
0x00000004U,
|
||||
0x00000008U,
|
||||
0x00000010U,
|
||||
0x00000020U,
|
||||
0x00000040U,
|
||||
0x00000080U,
|
||||
0x00000100U,
|
||||
0x00000200U,
|
||||
0x00000400U, /* 2^10 == 1024, Litecoin scrypt default */
|
||||
0x00000800U,
|
||||
0x00001000U,
|
||||
0x00002000U,
|
||||
0x00004000U,
|
||||
0x00008000U,
|
||||
0x00010000U,
|
||||
0x00020000U,
|
||||
0x00040000U,
|
||||
0x00080000U,
|
||||
0x00100000U
|
||||
};
|
||||
|
||||
/* Backwards compatibility, if NFACTOR not defined, default to 10 for scrypt */
|
||||
#ifndef NFACTOR
|
||||
#define NFACTOR 1024
|
||||
#define NFACTOR 10
|
||||
#endif
|
||||
|
||||
__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
|
||||
@ -764,11 +789,11 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
|
||||
{
|
||||
shittify(X);
|
||||
const uint zSIZE = 8;
|
||||
const uint ySIZE = (NFACTOR/LOOKUP_GAP+(NFACTOR%LOOKUP_GAP>0));
|
||||
const uint ySIZE = (N[NFACTOR]/LOOKUP_GAP+(N[NFACTOR]%LOOKUP_GAP>0));
|
||||
const uint xSIZE = CONCURRENT_THREADS;
|
||||
uint x = get_global_id(0)%xSIZE;
|
||||
|
||||
for(uint y=0; y<NFACTOR/LOOKUP_GAP; ++y)
|
||||
for(uint y=0; y<N[NFACTOR]/LOOKUP_GAP; ++y)
|
||||
{
|
||||
#pragma unroll
|
||||
for(uint z=0; z<zSIZE; ++z)
|
||||
@ -778,18 +803,18 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
|
||||
}
|
||||
#if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8)
|
||||
{
|
||||
uint y = (NFACTOR/LOOKUP_GAP);
|
||||
uint y = (N[NFACTOR]/LOOKUP_GAP);
|
||||
#pragma unroll
|
||||
for(uint z=0; z<zSIZE; ++z)
|
||||
lookup[CO] = X[z];
|
||||
for(uint i=0; i<NFACTOR%LOOKUP_GAP; ++i)
|
||||
for(uint i=0; i<N[NFACTOR]%LOOKUP_GAP; ++i)
|
||||
salsa(X);
|
||||
}
|
||||
#endif
|
||||
for (uint i=0; i<NFACTOR; ++i)
|
||||
for (uint i=0; i<N[NFACTOR]; ++i)
|
||||
{
|
||||
uint4 V[8];
|
||||
uint j = X[7].x & (NFACTOR-1);
|
||||
uint j = X[7].x & (N[NFACTOR]-1);
|
||||
uint y = (j/LOOKUP_GAP);
|
||||
#pragma unroll
|
||||
for(uint z=0; z<zSIZE; ++z)
|
||||
|
@ -29,9 +29,34 @@
|
||||
* online backup system.
|
||||
*/
|
||||
|
||||
/* Backwards compatibility, if NFACTOR not defined, default to 1024 scrypt */
|
||||
/* N (nfactor), CPU/Memory cost parameter */
|
||||
__constant uint N[] = {
|
||||
0x00000001U, /* never used, padding */
|
||||
0x00000002U,
|
||||
0x00000004U,
|
||||
0x00000008U,
|
||||
0x00000010U,
|
||||
0x00000020U,
|
||||
0x00000040U,
|
||||
0x00000080U,
|
||||
0x00000100U,
|
||||
0x00000200U,
|
||||
0x00000400U, /* 2^10 == 1024, Litecoin scrypt default */
|
||||
0x00000800U,
|
||||
0x00001000U,
|
||||
0x00002000U,
|
||||
0x00004000U,
|
||||
0x00008000U,
|
||||
0x00010000U,
|
||||
0x00020000U,
|
||||
0x00040000U,
|
||||
0x00080000U,
|
||||
0x00100000U
|
||||
};
|
||||
|
||||
/* Backwards compatibility, if NFACTOR not defined, default to 10 for scrypt */
|
||||
#ifndef NFACTOR
|
||||
#define NFACTOR 1024
|
||||
#define NFACTOR 10
|
||||
#endif
|
||||
|
||||
__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
|
||||
@ -703,11 +728,11 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
|
||||
{
|
||||
shittify(X);
|
||||
const uint zSIZE = 8;
|
||||
const uint ySIZE = (NFACTOR/LOOKUP_GAP+(NFACTOR%LOOKUP_GAP>0));
|
||||
const uint ySIZE = (N[NFACTOR]/LOOKUP_GAP+(N[NFACTOR]%LOOKUP_GAP>0));
|
||||
const uint xSIZE = CONCURRENT_THREADS;
|
||||
uint x = get_global_id(0)%xSIZE;
|
||||
|
||||
for(uint y=0; y<NFACTOR/LOOKUP_GAP; ++y)
|
||||
for(uint y=0; y<N[NFACTOR]/LOOKUP_GAP; ++y)
|
||||
{
|
||||
#pragma unroll
|
||||
for(uint z=0; z<zSIZE; ++z)
|
||||
@ -717,18 +742,18 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
|
||||
}
|
||||
#if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8)
|
||||
{
|
||||
uint y = (NFACTOR/LOOKUP_GAP);
|
||||
uint y = (N[NFACTOR]/LOOKUP_GAP);
|
||||
#pragma unroll
|
||||
for(uint z=0; z<zSIZE; ++z)
|
||||
lookup[CO] = X[z];
|
||||
for(uint i=0; i<NFACTOR%LOOKUP_GAP; ++i)
|
||||
for(uint i=0; i<N[NFACTOR]%LOOKUP_GAP; ++i)
|
||||
salsa(X);
|
||||
}
|
||||
#endif
|
||||
for (uint i=0; i<NFACTOR; ++i)
|
||||
for (uint i=0; i<N[NFACTOR]; ++i)
|
||||
{
|
||||
uint4 V[8];
|
||||
uint j = X[7].x & (NFACTOR-1);
|
||||
uint j = X[7].x & (N[NFACTOR]-1);
|
||||
uint y = (j/LOOKUP_GAP);
|
||||
#pragma unroll
|
||||
for(uint z=0; z<zSIZE; ++z)
|
||||
|
@ -28,9 +28,34 @@
|
||||
* online backup system.
|
||||
*/
|
||||
|
||||
/* Backwards compatibility, if NFACTOR not defined, default to 1024 scrypt */
|
||||
/* N (nfactor), CPU/Memory cost parameter */
|
||||
__constant uint N[] = {
|
||||
0x00000001U, /* never used, padding */
|
||||
0x00000002U,
|
||||
0x00000004U,
|
||||
0x00000008U,
|
||||
0x00000010U,
|
||||
0x00000020U,
|
||||
0x00000040U,
|
||||
0x00000080U,
|
||||
0x00000100U,
|
||||
0x00000200U,
|
||||
0x00000400U, /* 2^10 == 1024, Litecoin scrypt default */
|
||||
0x00000800U,
|
||||
0x00001000U,
|
||||
0x00002000U,
|
||||
0x00004000U,
|
||||
0x00008000U,
|
||||
0x00010000U,
|
||||
0x00020000U,
|
||||
0x00040000U,
|
||||
0x00080000U,
|
||||
0x00100000U
|
||||
};
|
||||
|
||||
/* Backwards compatibility, if NFACTOR not defined, default to 10 for scrypt */
|
||||
#ifndef NFACTOR
|
||||
#define NFACTOR 1024
|
||||
#define NFACTOR 10
|
||||
#endif
|
||||
|
||||
__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
|
||||
@ -764,11 +789,11 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
|
||||
{
|
||||
shittify(X);
|
||||
const uint zSIZE = 8;
|
||||
const uint ySIZE = (NFACTOR/LOOKUP_GAP+(NFACTOR%LOOKUP_GAP>0));
|
||||
const uint ySIZE = (N[NFACTOR]/LOOKUP_GAP+(N[NFACTOR]%LOOKUP_GAP>0));
|
||||
const uint xSIZE = CONCURRENT_THREADS;
|
||||
uint x = get_global_id(0)%xSIZE;
|
||||
|
||||
for(uint y=0; y<(NFACTOR/LOOKUP_GAP); ++y)
|
||||
for(uint y=0; y<(N[NFACTOR]/LOOKUP_GAP); ++y)
|
||||
{
|
||||
|
||||
for(uint z=0; z<zSIZE; ++z)
|
||||
@ -776,9 +801,9 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
|
||||
for(uint i=0; i<LOOKUP_GAP; ++i)
|
||||
salsa(X);
|
||||
}
|
||||
for (uint i=0; i<NFACTOR; ++i)
|
||||
for (uint i=0; i<N[NFACTOR]; ++i)
|
||||
{
|
||||
uint j = X[7].x & (NFACTOR-1);
|
||||
uint j = X[7].x & (N[NFACTOR]-1);
|
||||
uint y = (j/LOOKUP_GAP);
|
||||
|
||||
if (j&1)
|
||||
|
2
ocl.c
2
ocl.c
@ -592,7 +592,7 @@ build:
|
||||
char *CompilerOptions = (char *)calloc(1, 256);
|
||||
|
||||
sprintf(CompilerOptions, "-D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%d -D WORKSIZE=%d -D NFACTOR=%d",
|
||||
cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency, (int)clState->wsize,(unsigned int)nfactor);
|
||||
cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency, (int)clState->wsize, (unsigned int) opt_nfactor);
|
||||
|
||||
applog(LOG_DEBUG, "Setting worksize to %d", (int)(clState->wsize));
|
||||
if (clState->vwidth > 1)
|
||||
|
Loading…
x
Reference in New Issue
Block a user