Browse Source

kernel: use N-factor table instead of passing parameter N directly.

build-mingw
Noel Maersk 11 years ago
parent
commit
e18bcc2582
  1. 39
      kernel/alexkarnew.cl
  2. 39
      kernel/alexkarold.cl
  3. 41
      kernel/ckolivas.cl
  4. 41
      kernel/psw.cl
  5. 37
      kernel/zuikkis.cl
  6. 2
      ocl.c

39
kernel/alexkarnew.cl

@ -28,9 +28,34 @@ @@ -28,9 +28,34 @@
* online backup system.
*/
/* Backwards compatibility, if NFACTOR not defined, default to 1024 scrypt */
/* N (nfactor), CPU/Memory cost parameter */
__constant uint N[] = {
0x00000001U, /* never used, padding */
0x00000002U,
0x00000004U,
0x00000008U,
0x00000010U,
0x00000020U,
0x00000040U,
0x00000080U,
0x00000100U,
0x00000200U,
0x00000400U, /* 2^10 == 1024, Litecoin scrypt default */
0x00000800U,
0x00001000U,
0x00002000U,
0x00004000U,
0x00008000U,
0x00010000U,
0x00020000U,
0x00040000U,
0x00080000U,
0x00100000U
};
/* Backwards compatibility, if NFACTOR not defined, default to 10 for scrypt */
#ifndef NFACTOR
#define NFACTOR 1024
#define NFACTOR 10
#endif
__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
@ -766,7 +791,7 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup) @@ -766,7 +791,7 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
uint CO=rotl(x,3U);
uint CO_tmp=rotl(xSIZE,3U);
for(uint y=0; y<NFACTOR/LOOKUP_GAP; ++y, CO+=CO_tmp)
for(uint y=0; y<N[NFACTOR]/LOOKUP_GAP; ++y, CO+=CO_tmp)
{
uint CO_reg=CO;
#pragma unroll
@ -780,20 +805,20 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup) @@ -780,20 +805,20 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
#if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8)
{
uint y = (NFACTOR/LOOKUP_GAP);
uint y = (N[NFACTOR]/LOOKUP_GAP);
CO=CO_tmp+rotl(y*xSIZE,3U);
#pragma unroll
for(uint z=0; z<zSIZE; ++z, ++CO)
lookup[CO] = X[z];
for(uint i=0; i<NFACTOR%LOOKUP_GAP; ++i)
for(uint i=0; i<N[NFACTOR]%LOOKUP_GAP; ++i)
salsa(X);
}
#endif
for (uint i=0; i<NFACTOR; ++i)
for (uint i=0; i<N[NFACTOR]; ++i)
{
uint4 V[8];
uint j = X[7].x & (NFACTOR-1);
uint j = X[7].x & (N[NFACTOR]-1);
uint y = (j/LOOKUP_GAP);
uint CO_reg=CO_tmp+rotl(xSIZE*y,3U);

39
kernel/alexkarold.cl

@ -28,9 +28,34 @@ @@ -28,9 +28,34 @@
* online backup system.
*/
/* Backwards compatibility, if NFACTOR not defined, default to 1024 scrypt */
/* N (nfactor), CPU/Memory cost parameter */
__constant uint N[] = {
0x00000001U, /* never used, padding */
0x00000002U,
0x00000004U,
0x00000008U,
0x00000010U,
0x00000020U,
0x00000040U,
0x00000080U,
0x00000100U,
0x00000200U,
0x00000400U, /* 2^10 == 1024, Litecoin scrypt default */
0x00000800U,
0x00001000U,
0x00002000U,
0x00004000U,
0x00008000U,
0x00010000U,
0x00020000U,
0x00040000U,
0x00080000U,
0x00100000U
};
/* Backwards compatibility, if NFACTOR not defined, default to 10 for scrypt */
#ifndef NFACTOR
#define NFACTOR 1024
#define NFACTOR 10
#endif
__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
@ -766,7 +791,7 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup) @@ -766,7 +791,7 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
uint CO_tmp=xSIZE<<3U;
uint CO_tmp2=x<<3U;
for(uint y=0; y<NFACTOR/LOOKUP_GAP; ++y)
for(uint y=0; y<N[NFACTOR]/LOOKUP_GAP; ++y)
{
uint CO=y*CO_tmp+CO_tmp2;
#pragma unroll
@ -778,19 +803,19 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup) @@ -778,19 +803,19 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
#if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8)
{
uint y = (NFACTOR/LOOKUP_GAP);
uint y = (N[NFACTOR]/LOOKUP_GAP);
uint CO=y*CO_tmp+CO_tmp2;
#pragma unroll
for(uint z=0; z<zSIZE; ++z)
lookup[CO] = X[z];
for(uint i=0; i<NFACTOR%LOOKUP_GAP; ++i)
for(uint i=0; i<N[NFACTOR]%LOOKUP_GAP; ++i)
salsa(X);
}
#endif
for (uint i=0; i<NFACTOR; ++i)
for (uint i=0; i<N[NFACTOR]; ++i)
{
uint4 V[8];
uint j = X[7].x & (NFACTOR-1);
uint j = X[7].x & (N[NFACTOR]-1);
uint y = (j/LOOKUP_GAP);
uint CO=y*CO_tmp+CO_tmp2;
#pragma unroll

41
kernel/ckolivas.cl

@ -28,9 +28,34 @@ @@ -28,9 +28,34 @@
* online backup system.
*/
/* Backwards compatibility, if NFACTOR not defined, default to 1024 scrypt */
/* N (nfactor), CPU/Memory cost parameter */
__constant uint N[] = {
0x00000001U, /* never used, padding */
0x00000002U,
0x00000004U,
0x00000008U,
0x00000010U,
0x00000020U,
0x00000040U,
0x00000080U,
0x00000100U,
0x00000200U,
0x00000400U, /* 2^10 == 1024, Litecoin scrypt default */
0x00000800U,
0x00001000U,
0x00002000U,
0x00004000U,
0x00008000U,
0x00010000U,
0x00020000U,
0x00040000U,
0x00080000U,
0x00100000U
};
/* Backwards compatibility, if NFACTOR not defined, default to 10 for scrypt */
#ifndef NFACTOR
#define NFACTOR 1024
#define NFACTOR 10
#endif
__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
@ -764,11 +789,11 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup) @@ -764,11 +789,11 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
{
shittify(X);
const uint zSIZE = 8;
const uint ySIZE = (NFACTOR/LOOKUP_GAP+(NFACTOR%LOOKUP_GAP>0));
const uint ySIZE = (N[NFACTOR]/LOOKUP_GAP+(N[NFACTOR]%LOOKUP_GAP>0));
const uint xSIZE = CONCURRENT_THREADS;
uint x = get_global_id(0)%xSIZE;
for(uint y=0; y<NFACTOR/LOOKUP_GAP; ++y)
for(uint y=0; y<N[NFACTOR]/LOOKUP_GAP; ++y)
{
#pragma unroll
for(uint z=0; z<zSIZE; ++z)
@ -778,18 +803,18 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup) @@ -778,18 +803,18 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
}
#if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8)
{
uint y = (NFACTOR/LOOKUP_GAP);
uint y = (N[NFACTOR]/LOOKUP_GAP);
#pragma unroll
for(uint z=0; z<zSIZE; ++z)
lookup[CO] = X[z];
for(uint i=0; i<NFACTOR%LOOKUP_GAP; ++i)
for(uint i=0; i<N[NFACTOR]%LOOKUP_GAP; ++i)
salsa(X);
}
#endif
for (uint i=0; i<NFACTOR; ++i)
for (uint i=0; i<N[NFACTOR]; ++i)
{
uint4 V[8];
uint j = X[7].x & (NFACTOR-1);
uint j = X[7].x & (N[NFACTOR]-1);
uint y = (j/LOOKUP_GAP);
#pragma unroll
for(uint z=0; z<zSIZE; ++z)

41
kernel/psw.cl

@ -29,9 +29,34 @@ @@ -29,9 +29,34 @@
* online backup system.
*/
/* Backwards compatibility, if NFACTOR not defined, default to 1024 scrypt */
/* N (nfactor), CPU/Memory cost parameter */
__constant uint N[] = {
0x00000001U, /* never used, padding */
0x00000002U,
0x00000004U,
0x00000008U,
0x00000010U,
0x00000020U,
0x00000040U,
0x00000080U,
0x00000100U,
0x00000200U,
0x00000400U, /* 2^10 == 1024, Litecoin scrypt default */
0x00000800U,
0x00001000U,
0x00002000U,
0x00004000U,
0x00008000U,
0x00010000U,
0x00020000U,
0x00040000U,
0x00080000U,
0x00100000U
};
/* Backwards compatibility, if NFACTOR not defined, default to 10 for scrypt */
#ifndef NFACTOR
#define NFACTOR 1024
#define NFACTOR 10
#endif
__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
@ -703,11 +728,11 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup) @@ -703,11 +728,11 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
{
shittify(X);
const uint zSIZE = 8;
const uint ySIZE = (NFACTOR/LOOKUP_GAP+(NFACTOR%LOOKUP_GAP>0));
const uint ySIZE = (N[NFACTOR]/LOOKUP_GAP+(N[NFACTOR]%LOOKUP_GAP>0));
const uint xSIZE = CONCURRENT_THREADS;
uint x = get_global_id(0)%xSIZE;
for(uint y=0; y<NFACTOR/LOOKUP_GAP; ++y)
for(uint y=0; y<N[NFACTOR]/LOOKUP_GAP; ++y)
{
#pragma unroll
for(uint z=0; z<zSIZE; ++z)
@ -717,18 +742,18 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup) @@ -717,18 +742,18 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
}
#if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8)
{
uint y = (NFACTOR/LOOKUP_GAP);
uint y = (N[NFACTOR]/LOOKUP_GAP);
#pragma unroll
for(uint z=0; z<zSIZE; ++z)
lookup[CO] = X[z];
for(uint i=0; i<NFACTOR%LOOKUP_GAP; ++i)
for(uint i=0; i<N[NFACTOR]%LOOKUP_GAP; ++i)
salsa(X);
}
#endif
for (uint i=0; i<NFACTOR; ++i)
for (uint i=0; i<N[NFACTOR]; ++i)
{
uint4 V[8];
uint j = X[7].x & (NFACTOR-1);
uint j = X[7].x & (N[NFACTOR]-1);
uint y = (j/LOOKUP_GAP);
#pragma unroll
for(uint z=0; z<zSIZE; ++z)

37
kernel/zuikkis.cl

@ -28,9 +28,34 @@ @@ -28,9 +28,34 @@
* online backup system.
*/
/* Backwards compatibility, if NFACTOR not defined, default to 1024 scrypt */
/* N (nfactor), CPU/Memory cost parameter */
__constant uint N[] = {
0x00000001U, /* never used, padding */
0x00000002U,
0x00000004U,
0x00000008U,
0x00000010U,
0x00000020U,
0x00000040U,
0x00000080U,
0x00000100U,
0x00000200U,
0x00000400U, /* 2^10 == 1024, Litecoin scrypt default */
0x00000800U,
0x00001000U,
0x00002000U,
0x00004000U,
0x00008000U,
0x00010000U,
0x00020000U,
0x00040000U,
0x00080000U,
0x00100000U
};
/* Backwards compatibility, if NFACTOR not defined, default to 10 for scrypt */
#ifndef NFACTOR
#define NFACTOR 1024
#define NFACTOR 10
#endif
__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
@ -764,11 +789,11 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup) @@ -764,11 +789,11 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
{
shittify(X);
const uint zSIZE = 8;
const uint ySIZE = (NFACTOR/LOOKUP_GAP+(NFACTOR%LOOKUP_GAP>0));
const uint ySIZE = (N[NFACTOR]/LOOKUP_GAP+(N[NFACTOR]%LOOKUP_GAP>0));
const uint xSIZE = CONCURRENT_THREADS;
uint x = get_global_id(0)%xSIZE;
for(uint y=0; y<(NFACTOR/LOOKUP_GAP); ++y)
for(uint y=0; y<(N[NFACTOR]/LOOKUP_GAP); ++y)
{
for(uint z=0; z<zSIZE; ++z)
@ -776,9 +801,9 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup) @@ -776,9 +801,9 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
for(uint i=0; i<LOOKUP_GAP; ++i)
salsa(X);
}
for (uint i=0; i<NFACTOR; ++i)
for (uint i=0; i<N[NFACTOR]; ++i)
{
uint j = X[7].x & (NFACTOR-1);
uint j = X[7].x & (N[NFACTOR]-1);
uint y = (j/LOOKUP_GAP);
if (j&1)

2
ocl.c

@ -592,7 +592,7 @@ build: @@ -592,7 +592,7 @@ build:
char *CompilerOptions = (char *)calloc(1, 256);
sprintf(CompilerOptions, "-D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%d -D WORKSIZE=%d -D NFACTOR=%d",
cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency, (int)clState->wsize,(unsigned int)nfactor);
cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency, (int)clState->wsize, (unsigned int) opt_nfactor);
applog(LOG_DEBUG, "Setting worksize to %d", (int)(clState->wsize));
if (clState->vwidth > 1)

Loading…
Cancel
Save