Browse Source

kernel: use N-factor table instead of passing parameter N directly.

build-mingw
Noel Maersk 11 years ago
parent
commit
e18bcc2582
  1. 39
      kernel/alexkarnew.cl
  2. 39
      kernel/alexkarold.cl
  3. 41
      kernel/ckolivas.cl
  4. 41
      kernel/psw.cl
  5. 37
      kernel/zuikkis.cl
  6. 2
      ocl.c

39
kernel/alexkarnew.cl

@ -28,9 +28,34 @@
* online backup system. * online backup system.
*/ */
/* Backwards compatibility, if NFACTOR not defined, default to 1024 scrypt */ /* N (nfactor), CPU/Memory cost parameter */
__constant uint N[] = {
0x00000001U, /* never used, padding */
0x00000002U,
0x00000004U,
0x00000008U,
0x00000010U,
0x00000020U,
0x00000040U,
0x00000080U,
0x00000100U,
0x00000200U,
0x00000400U, /* 2^10 == 1024, Litecoin scrypt default */
0x00000800U,
0x00001000U,
0x00002000U,
0x00004000U,
0x00008000U,
0x00010000U,
0x00020000U,
0x00040000U,
0x00080000U,
0x00100000U
};
/* Backwards compatibility, if NFACTOR not defined, default to 10 for scrypt */
#ifndef NFACTOR #ifndef NFACTOR
#define NFACTOR 1024 #define NFACTOR 10
#endif #endif
__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 }; __constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
@ -766,7 +791,7 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
uint CO=rotl(x,3U); uint CO=rotl(x,3U);
uint CO_tmp=rotl(xSIZE,3U); uint CO_tmp=rotl(xSIZE,3U);
for(uint y=0; y<NFACTOR/LOOKUP_GAP; ++y, CO+=CO_tmp) for(uint y=0; y<N[NFACTOR]/LOOKUP_GAP; ++y, CO+=CO_tmp)
{ {
uint CO_reg=CO; uint CO_reg=CO;
#pragma unroll #pragma unroll
@ -780,20 +805,20 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
#if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8) #if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8)
{ {
uint y = (NFACTOR/LOOKUP_GAP); uint y = (N[NFACTOR]/LOOKUP_GAP);
CO=CO_tmp+rotl(y*xSIZE,3U); CO=CO_tmp+rotl(y*xSIZE,3U);
#pragma unroll #pragma unroll
for(uint z=0; z<zSIZE; ++z, ++CO) for(uint z=0; z<zSIZE; ++z, ++CO)
lookup[CO] = X[z]; lookup[CO] = X[z];
for(uint i=0; i<NFACTOR%LOOKUP_GAP; ++i) for(uint i=0; i<N[NFACTOR]%LOOKUP_GAP; ++i)
salsa(X); salsa(X);
} }
#endif #endif
for (uint i=0; i<NFACTOR; ++i) for (uint i=0; i<N[NFACTOR]; ++i)
{ {
uint4 V[8]; uint4 V[8];
uint j = X[7].x & (NFACTOR-1); uint j = X[7].x & (N[NFACTOR]-1);
uint y = (j/LOOKUP_GAP); uint y = (j/LOOKUP_GAP);
uint CO_reg=CO_tmp+rotl(xSIZE*y,3U); uint CO_reg=CO_tmp+rotl(xSIZE*y,3U);

39
kernel/alexkarold.cl

@ -28,9 +28,34 @@
* online backup system. * online backup system.
*/ */
/* Backwards compatibility, if NFACTOR not defined, default to 1024 scrypt */ /* N (nfactor), CPU/Memory cost parameter */
__constant uint N[] = {
0x00000001U, /* never used, padding */
0x00000002U,
0x00000004U,
0x00000008U,
0x00000010U,
0x00000020U,
0x00000040U,
0x00000080U,
0x00000100U,
0x00000200U,
0x00000400U, /* 2^10 == 1024, Litecoin scrypt default */
0x00000800U,
0x00001000U,
0x00002000U,
0x00004000U,
0x00008000U,
0x00010000U,
0x00020000U,
0x00040000U,
0x00080000U,
0x00100000U
};
/* Backwards compatibility, if NFACTOR not defined, default to 10 for scrypt */
#ifndef NFACTOR #ifndef NFACTOR
#define NFACTOR 1024 #define NFACTOR 10
#endif #endif
__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 }; __constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
@ -766,7 +791,7 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
uint CO_tmp=xSIZE<<3U; uint CO_tmp=xSIZE<<3U;
uint CO_tmp2=x<<3U; uint CO_tmp2=x<<3U;
for(uint y=0; y<NFACTOR/LOOKUP_GAP; ++y) for(uint y=0; y<N[NFACTOR]/LOOKUP_GAP; ++y)
{ {
uint CO=y*CO_tmp+CO_tmp2; uint CO=y*CO_tmp+CO_tmp2;
#pragma unroll #pragma unroll
@ -778,19 +803,19 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
#if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8) #if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8)
{ {
uint y = (NFACTOR/LOOKUP_GAP); uint y = (N[NFACTOR]/LOOKUP_GAP);
uint CO=y*CO_tmp+CO_tmp2; uint CO=y*CO_tmp+CO_tmp2;
#pragma unroll #pragma unroll
for(uint z=0; z<zSIZE; ++z) for(uint z=0; z<zSIZE; ++z)
lookup[CO] = X[z]; lookup[CO] = X[z];
for(uint i=0; i<NFACTOR%LOOKUP_GAP; ++i) for(uint i=0; i<N[NFACTOR]%LOOKUP_GAP; ++i)
salsa(X); salsa(X);
} }
#endif #endif
for (uint i=0; i<NFACTOR; ++i) for (uint i=0; i<N[NFACTOR]; ++i)
{ {
uint4 V[8]; uint4 V[8];
uint j = X[7].x & (NFACTOR-1); uint j = X[7].x & (N[NFACTOR]-1);
uint y = (j/LOOKUP_GAP); uint y = (j/LOOKUP_GAP);
uint CO=y*CO_tmp+CO_tmp2; uint CO=y*CO_tmp+CO_tmp2;
#pragma unroll #pragma unroll

41
kernel/ckolivas.cl

@ -28,9 +28,34 @@
* online backup system. * online backup system.
*/ */
/* Backwards compatibility, if NFACTOR not defined, default to 1024 scrypt */ /* N (nfactor), CPU/Memory cost parameter */
__constant uint N[] = {
0x00000001U, /* never used, padding */
0x00000002U,
0x00000004U,
0x00000008U,
0x00000010U,
0x00000020U,
0x00000040U,
0x00000080U,
0x00000100U,
0x00000200U,
0x00000400U, /* 2^10 == 1024, Litecoin scrypt default */
0x00000800U,
0x00001000U,
0x00002000U,
0x00004000U,
0x00008000U,
0x00010000U,
0x00020000U,
0x00040000U,
0x00080000U,
0x00100000U
};
/* Backwards compatibility, if NFACTOR not defined, default to 10 for scrypt */
#ifndef NFACTOR #ifndef NFACTOR
#define NFACTOR 1024 #define NFACTOR 10
#endif #endif
__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 }; __constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
@ -764,11 +789,11 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
{ {
shittify(X); shittify(X);
const uint zSIZE = 8; const uint zSIZE = 8;
const uint ySIZE = (NFACTOR/LOOKUP_GAP+(NFACTOR%LOOKUP_GAP>0)); const uint ySIZE = (N[NFACTOR]/LOOKUP_GAP+(N[NFACTOR]%LOOKUP_GAP>0));
const uint xSIZE = CONCURRENT_THREADS; const uint xSIZE = CONCURRENT_THREADS;
uint x = get_global_id(0)%xSIZE; uint x = get_global_id(0)%xSIZE;
for(uint y=0; y<NFACTOR/LOOKUP_GAP; ++y) for(uint y=0; y<N[NFACTOR]/LOOKUP_GAP; ++y)
{ {
#pragma unroll #pragma unroll
for(uint z=0; z<zSIZE; ++z) for(uint z=0; z<zSIZE; ++z)
@ -778,18 +803,18 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
} }
#if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8) #if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8)
{ {
uint y = (NFACTOR/LOOKUP_GAP); uint y = (N[NFACTOR]/LOOKUP_GAP);
#pragma unroll #pragma unroll
for(uint z=0; z<zSIZE; ++z) for(uint z=0; z<zSIZE; ++z)
lookup[CO] = X[z]; lookup[CO] = X[z];
for(uint i=0; i<NFACTOR%LOOKUP_GAP; ++i) for(uint i=0; i<N[NFACTOR]%LOOKUP_GAP; ++i)
salsa(X); salsa(X);
} }
#endif #endif
for (uint i=0; i<NFACTOR; ++i) for (uint i=0; i<N[NFACTOR]; ++i)
{ {
uint4 V[8]; uint4 V[8];
uint j = X[7].x & (NFACTOR-1); uint j = X[7].x & (N[NFACTOR]-1);
uint y = (j/LOOKUP_GAP); uint y = (j/LOOKUP_GAP);
#pragma unroll #pragma unroll
for(uint z=0; z<zSIZE; ++z) for(uint z=0; z<zSIZE; ++z)

41
kernel/psw.cl

@ -29,9 +29,34 @@
* online backup system. * online backup system.
*/ */
/* Backwards compatibility, if NFACTOR not defined, default to 1024 scrypt */ /* N (nfactor), CPU/Memory cost parameter */
__constant uint N[] = {
0x00000001U, /* never used, padding */
0x00000002U,
0x00000004U,
0x00000008U,
0x00000010U,
0x00000020U,
0x00000040U,
0x00000080U,
0x00000100U,
0x00000200U,
0x00000400U, /* 2^10 == 1024, Litecoin scrypt default */
0x00000800U,
0x00001000U,
0x00002000U,
0x00004000U,
0x00008000U,
0x00010000U,
0x00020000U,
0x00040000U,
0x00080000U,
0x00100000U
};
/* Backwards compatibility, if NFACTOR not defined, default to 10 for scrypt */
#ifndef NFACTOR #ifndef NFACTOR
#define NFACTOR 1024 #define NFACTOR 10
#endif #endif
__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 }; __constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
@ -703,11 +728,11 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
{ {
shittify(X); shittify(X);
const uint zSIZE = 8; const uint zSIZE = 8;
const uint ySIZE = (NFACTOR/LOOKUP_GAP+(NFACTOR%LOOKUP_GAP>0)); const uint ySIZE = (N[NFACTOR]/LOOKUP_GAP+(N[NFACTOR]%LOOKUP_GAP>0));
const uint xSIZE = CONCURRENT_THREADS; const uint xSIZE = CONCURRENT_THREADS;
uint x = get_global_id(0)%xSIZE; uint x = get_global_id(0)%xSIZE;
for(uint y=0; y<NFACTOR/LOOKUP_GAP; ++y) for(uint y=0; y<N[NFACTOR]/LOOKUP_GAP; ++y)
{ {
#pragma unroll #pragma unroll
for(uint z=0; z<zSIZE; ++z) for(uint z=0; z<zSIZE; ++z)
@ -717,18 +742,18 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
} }
#if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8) #if (LOOKUP_GAP != 1) && (LOOKUP_GAP != 2) && (LOOKUP_GAP != 4) && (LOOKUP_GAP != 8)
{ {
uint y = (NFACTOR/LOOKUP_GAP); uint y = (N[NFACTOR]/LOOKUP_GAP);
#pragma unroll #pragma unroll
for(uint z=0; z<zSIZE; ++z) for(uint z=0; z<zSIZE; ++z)
lookup[CO] = X[z]; lookup[CO] = X[z];
for(uint i=0; i<NFACTOR%LOOKUP_GAP; ++i) for(uint i=0; i<N[NFACTOR]%LOOKUP_GAP; ++i)
salsa(X); salsa(X);
} }
#endif #endif
for (uint i=0; i<NFACTOR; ++i) for (uint i=0; i<N[NFACTOR]; ++i)
{ {
uint4 V[8]; uint4 V[8];
uint j = X[7].x & (NFACTOR-1); uint j = X[7].x & (N[NFACTOR]-1);
uint y = (j/LOOKUP_GAP); uint y = (j/LOOKUP_GAP);
#pragma unroll #pragma unroll
for(uint z=0; z<zSIZE; ++z) for(uint z=0; z<zSIZE; ++z)

37
kernel/zuikkis.cl

@ -28,9 +28,34 @@
* online backup system. * online backup system.
*/ */
/* Backwards compatibility, if NFACTOR not defined, default to 1024 scrypt */ /* N (nfactor), CPU/Memory cost parameter */
__constant uint N[] = {
0x00000001U, /* never used, padding */
0x00000002U,
0x00000004U,
0x00000008U,
0x00000010U,
0x00000020U,
0x00000040U,
0x00000080U,
0x00000100U,
0x00000200U,
0x00000400U, /* 2^10 == 1024, Litecoin scrypt default */
0x00000800U,
0x00001000U,
0x00002000U,
0x00004000U,
0x00008000U,
0x00010000U,
0x00020000U,
0x00040000U,
0x00080000U,
0x00100000U
};
/* Backwards compatibility, if NFACTOR not defined, default to 10 for scrypt */
#ifndef NFACTOR #ifndef NFACTOR
#define NFACTOR 1024 #define NFACTOR 10
#endif #endif
__constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 }; __constant uint ES[2] = { 0x00FF00FF, 0xFF00FF00 };
@ -764,11 +789,11 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
{ {
shittify(X); shittify(X);
const uint zSIZE = 8; const uint zSIZE = 8;
const uint ySIZE = (NFACTOR/LOOKUP_GAP+(NFACTOR%LOOKUP_GAP>0)); const uint ySIZE = (N[NFACTOR]/LOOKUP_GAP+(N[NFACTOR]%LOOKUP_GAP>0));
const uint xSIZE = CONCURRENT_THREADS; const uint xSIZE = CONCURRENT_THREADS;
uint x = get_global_id(0)%xSIZE; uint x = get_global_id(0)%xSIZE;
for(uint y=0; y<(NFACTOR/LOOKUP_GAP); ++y) for(uint y=0; y<(N[NFACTOR]/LOOKUP_GAP); ++y)
{ {
for(uint z=0; z<zSIZE; ++z) for(uint z=0; z<zSIZE; ++z)
@ -776,9 +801,9 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup)
for(uint i=0; i<LOOKUP_GAP; ++i) for(uint i=0; i<LOOKUP_GAP; ++i)
salsa(X); salsa(X);
} }
for (uint i=0; i<NFACTOR; ++i) for (uint i=0; i<N[NFACTOR]; ++i)
{ {
uint j = X[7].x & (NFACTOR-1); uint j = X[7].x & (N[NFACTOR]-1);
uint y = (j/LOOKUP_GAP); uint y = (j/LOOKUP_GAP);
if (j&1) if (j&1)

2
ocl.c

@ -592,7 +592,7 @@ build:
char *CompilerOptions = (char *)calloc(1, 256); char *CompilerOptions = (char *)calloc(1, 256);
sprintf(CompilerOptions, "-D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%d -D WORKSIZE=%d -D NFACTOR=%d", sprintf(CompilerOptions, "-D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%d -D WORKSIZE=%d -D NFACTOR=%d",
cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency, (int)clState->wsize,(unsigned int)nfactor); cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency, (int)clState->wsize, (unsigned int) opt_nfactor);
applog(LOG_DEBUG, "Setting worksize to %d", (int)(clState->wsize)); applog(LOG_DEBUG, "Setting worksize to %d", (int)(clState->wsize));
if (clState->vwidth > 1) if (clState->vwidth > 1)

Loading…
Cancel
Save