Christian Buchner
11 years ago
21 changed files with 934 additions and 261 deletions
@ -1,200 +1,190 @@
@@ -1,200 +1,190 @@
|
||||
/* cpuminer-config.h. Generated from cpuminer-config.h.in by configure. */ |
||||
/* cpuminer-config.h.in. Generated from configure.ac by autoheader. */ |
||||
|
||||
/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP
|
||||
systems. This function is required for `alloca.c' support on those systems. |
||||
*/ |
||||
/* #undef CRAY_STACKSEG_END */ |
||||
|
||||
/* Define to 1 if using `alloca.c'. */ |
||||
/* #undef C_ALLOCA */ |
||||
|
||||
/* Define to 1 if you have `alloca', as a function or macro. */ |
||||
#define HAVE_ALLOCA 1 |
||||
|
||||
/* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix).
|
||||
*/ |
||||
#define HAVE_ALLOCA_H 1 |
||||
|
||||
/* Define to 1 if you have the declaration of `be32dec', and to 0 if you
|
||||
don't. */ |
||||
#define HAVE_DECL_BE32DEC 0 |
||||
|
||||
/* Define to 1 if you have the declaration of `be32enc', and to 0 if you
|
||||
don't. */ |
||||
#define HAVE_DECL_BE32ENC 0 |
||||
|
||||
/* Define to 1 if you have the declaration of `le32dec', and to 0 if you
|
||||
don't. */ |
||||
#define HAVE_DECL_LE32DEC 0 |
||||
|
||||
/* Define to 1 if you have the declaration of `le32enc', and to 0 if you
|
||||
don't. */ |
||||
#define HAVE_DECL_LE32ENC 0 |
||||
|
||||
/* Define to 1 if you have the `getopt_long' function. */ |
||||
#define HAVE_GETOPT_LONG 1 |
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */ |
||||
#define HAVE_INTTYPES_H 1 |
||||
|
||||
/* Define to 1 if you have the `crypto' library (-lcrypto). */ |
||||
#define HAVE_LIBCRYPTO 1 |
||||
|
||||
/* Define to 1 if you have a functional curl library. */ |
||||
#define HAVE_LIBCURL 1 |
||||
|
||||
/* Define to 1 if you have the `ssl' library (-lssl). */ |
||||
#define HAVE_LIBSSL 1 |
||||
|
||||
/* Define to 1 if you have the <memory.h> header file. */ |
||||
#define HAVE_MEMORY_H 1 |
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */ |
||||
#define HAVE_STDINT_H 1 |
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */ |
||||
#define HAVE_STDLIB_H 1 |
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */ |
||||
#define HAVE_STRINGS_H 1 |
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */ |
||||
#define HAVE_STRING_H 1 |
||||
|
||||
/* Define to 1 if you have the <syslog.h> header file. */ |
||||
#define HAVE_SYSLOG_H 1 |
||||
|
||||
/* Define to 1 if you have the <sys/endian.h> header file. */ |
||||
/* #undef HAVE_SYS_ENDIAN_H */ |
||||
|
||||
/* Define to 1 if you have the <sys/param.h> header file. */ |
||||
#define HAVE_SYS_PARAM_H 1 |
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */ |
||||
#define HAVE_SYS_STAT_H 1 |
||||
|
||||
/* Define to 1 if you have the <sys/sysctl.h> header file. */ |
||||
#define HAVE_SYS_SYSCTL_H 1 |
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */ |
||||
#define HAVE_SYS_TYPES_H 1 |
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */ |
||||
#define HAVE_UNISTD_H 1 |
||||
|
||||
/* Defined if libcurl supports AsynchDNS */ |
||||
/* #undef LIBCURL_FEATURE_ASYNCHDNS */ |
||||
|
||||
/* Defined if libcurl supports IDN */ |
||||
#define LIBCURL_FEATURE_IDN 1 |
||||
|
||||
/* Defined if libcurl supports IPv6 */ |
||||
#define LIBCURL_FEATURE_IPV6 1 |
||||
|
||||
/* Defined if libcurl supports KRB4 */ |
||||
/* #undef LIBCURL_FEATURE_KRB4 */ |
||||
|
||||
/* Defined if libcurl supports libz */ |
||||
#define LIBCURL_FEATURE_LIBZ 1 |
||||
|
||||
/* Defined if libcurl supports NTLM */ |
||||
#define LIBCURL_FEATURE_NTLM 1 |
||||
|
||||
/* Defined if libcurl supports SSL */ |
||||
#define LIBCURL_FEATURE_SSL 1 |
||||
|
||||
/* Defined if libcurl supports SSPI */ |
||||
/* #undef LIBCURL_FEATURE_SSPI */ |
||||
|
||||
/* Defined if libcurl supports DICT */ |
||||
#define LIBCURL_PROTOCOL_DICT 1 |
||||
|
||||
/* Defined if libcurl supports FILE */ |
||||
#define LIBCURL_PROTOCOL_FILE 1 |
||||
|
||||
/* Defined if libcurl supports FTP */ |
||||
#define LIBCURL_PROTOCOL_FTP 1 |
||||
|
||||
/* Defined if libcurl supports FTPS */ |
||||
#define LIBCURL_PROTOCOL_FTPS 1 |
||||
|
||||
/* Defined if libcurl supports HTTP */ |
||||
#define LIBCURL_PROTOCOL_HTTP 1 |
||||
|
||||
/* Defined if libcurl supports HTTPS */ |
||||
#define LIBCURL_PROTOCOL_HTTPS 1 |
||||
|
||||
/* Defined if libcurl supports IMAP */ |
||||
#define LIBCURL_PROTOCOL_IMAP 1 |
||||
|
||||
/* Defined if libcurl supports LDAP */ |
||||
#define LIBCURL_PROTOCOL_LDAP 1 |
||||
|
||||
/* Defined if libcurl supports POP3 */ |
||||
#define LIBCURL_PROTOCOL_POP3 1 |
||||
|
||||
/* Defined if libcurl supports RTSP */ |
||||
#define LIBCURL_PROTOCOL_RTSP 1 |
||||
|
||||
/* Defined if libcurl supports SMTP */ |
||||
#define LIBCURL_PROTOCOL_SMTP 1 |
||||
|
||||
/* Defined if libcurl supports TELNET */ |
||||
#define LIBCURL_PROTOCOL_TELNET 1 |
||||
|
||||
/* Defined if libcurl supports TFTP */ |
||||
#define LIBCURL_PROTOCOL_TFTP 1 |
||||
|
||||
/* Define to 1 if your C compiler doesn't accept -c and -o together. */ |
||||
/* #undef NO_MINUS_C_MINUS_O */ |
||||
|
||||
/* Name of package */ |
||||
#define PACKAGE "ccminer" |
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */ |
||||
#define PACKAGE_BUGREPORT "" |
||||
|
||||
/* Define to the full name of this package. */ |
||||
#define PACKAGE_NAME "ccminer" |
||||
|
||||
/* Define to the full name and version of this package. */ |
||||
#define PACKAGE_STRING "ccminer 2014.03.21" |
||||
|
||||
/* Define to the one symbol short name of this package. */ |
||||
#define PACKAGE_TARNAME "ccminer" |
||||
|
||||
/* Define to the home page for this package. */ |
||||
#define PACKAGE_URL "" |
||||
|
||||
/* Define to the version of this package. */ |
||||
#define PACKAGE_VERSION "2014.03.21" |
||||
|
||||
/* If using the C implementation of alloca, define if you know the
|
||||
direction of stack growth for your system; otherwise it will be |
||||
automatically deduced at runtime. |
||||
STACK_DIRECTION > 0 => grows toward higher addresses |
||||
STACK_DIRECTION < 0 => grows toward lower addresses |
||||
STACK_DIRECTION = 0 => direction of growth unknown */ |
||||
/* #undef STACK_DIRECTION */ |
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */ |
||||
#define STDC_HEADERS 1 |
||||
|
||||
/* Define to 1 if AVX assembly is available. */ |
||||
#define USE_AVX 1 |
||||
|
||||
/* Define to 1 if AVX2 assembly is available. */ |
||||
#define USE_AVX2 1 |
||||
|
||||
/* Define to 1 if XOP assembly is available. */ |
||||
#define USE_XOP 1 |
||||
|
||||
/* Version number of package */ |
||||
#define VERSION "2014.03.21" |
||||
|
||||
/* Define curl_free() as free() if our version of curl lacks curl_free. */ |
||||
/* #undef curl_free */ |
||||
|
||||
/* Define to `unsigned int' if <sys/types.h> does not define. */ |
||||
/* #undef size_t */ |
||||
/* cpuminer-config.h.in. Generated from configure.ac by autoheader. */ |
||||
|
||||
/* Define to one of `_getb67', `GETB67', `getb67' for Cray-2 and Cray-YMP
|
||||
systems. This function is required for `alloca.c' support on those systems. |
||||
*/ |
||||
#undef CRAY_STACKSEG_END |
||||
|
||||
/* Define to 1 if using `alloca.c'. */ |
||||
#undef C_ALLOCA |
||||
|
||||
/* Define to 1 if you have `alloca', as a function or macro. */ |
||||
#undef HAVE_ALLOCA |
||||
|
||||
/* Define to 1 if you have <alloca.h> and it should be used (not on Ultrix).
|
||||
*/ |
||||
#undef HAVE_ALLOCA_H |
||||
|
||||
/* Define to 1 if you have the declaration of `be32dec', and to 0 if you
|
||||
don't. */ |
||||
#undef HAVE_DECL_BE32DEC |
||||
|
||||
/* Define to 1 if you have the declaration of `be32enc', and to 0 if you
|
||||
don't. */ |
||||
#undef HAVE_DECL_BE32ENC |
||||
|
||||
/* Define to 1 if you have the declaration of `le32dec', and to 0 if you
|
||||
don't. */ |
||||
#undef HAVE_DECL_LE32DEC |
||||
|
||||
/* Define to 1 if you have the declaration of `le32enc', and to 0 if you
|
||||
don't. */ |
||||
#undef HAVE_DECL_LE32ENC |
||||
|
||||
/* Define to 1 if you have the `getopt_long' function. */ |
||||
#define HAVE_GETOPT_LONG 1 |
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */ |
||||
#undef HAVE_INTTYPES_H |
||||
|
||||
/* Define to 1 if you have a functional curl library. */ |
||||
#undef HAVE_LIBCURL |
||||
|
||||
/* Define to 1 if you have the <memory.h> header file. */ |
||||
#undef HAVE_MEMORY_H |
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */ |
||||
#undef HAVE_STDINT_H |
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */ |
||||
#undef HAVE_STDLIB_H |
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */ |
||||
#undef HAVE_STRINGS_H |
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */ |
||||
#undef HAVE_STRING_H |
||||
|
||||
/* Define to 1 if you have the <syslog.h> header file. */ |
||||
#undef HAVE_SYSLOG_H |
||||
|
||||
/* Define to 1 if you have the <sys/endian.h> header file. */ |
||||
#undef HAVE_SYS_ENDIAN_H |
||||
|
||||
/* Define to 1 if you have the <sys/param.h> header file. */ |
||||
#undef HAVE_SYS_PARAM_H |
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */ |
||||
#undef HAVE_SYS_STAT_H |
||||
|
||||
/* Define to 1 if you have the <sys/sysctl.h> header file. */ |
||||
#undef HAVE_SYS_SYSCTL_H |
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */ |
||||
#undef HAVE_SYS_TYPES_H |
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */ |
||||
#undef HAVE_UNISTD_H |
||||
|
||||
/* Defined if libcurl supports AsynchDNS */ |
||||
#undef LIBCURL_FEATURE_ASYNCHDNS |
||||
|
||||
/* Defined if libcurl supports IDN */ |
||||
#undef LIBCURL_FEATURE_IDN |
||||
|
||||
/* Defined if libcurl supports IPv6 */ |
||||
#undef LIBCURL_FEATURE_IPV6 |
||||
|
||||
/* Defined if libcurl supports KRB4 */ |
||||
#undef LIBCURL_FEATURE_KRB4 |
||||
|
||||
/* Defined if libcurl supports libz */ |
||||
#undef LIBCURL_FEATURE_LIBZ |
||||
|
||||
/* Defined if libcurl supports NTLM */ |
||||
#undef LIBCURL_FEATURE_NTLM |
||||
|
||||
/* Defined if libcurl supports SSL */ |
||||
#undef LIBCURL_FEATURE_SSL |
||||
|
||||
/* Defined if libcurl supports SSPI */ |
||||
#undef LIBCURL_FEATURE_SSPI |
||||
|
||||
/* Defined if libcurl supports DICT */ |
||||
#undef LIBCURL_PROTOCOL_DICT |
||||
|
||||
/* Defined if libcurl supports FILE */ |
||||
#undef LIBCURL_PROTOCOL_FILE |
||||
|
||||
/* Defined if libcurl supports FTP */ |
||||
#undef LIBCURL_PROTOCOL_FTP |
||||
|
||||
/* Defined if libcurl supports FTPS */ |
||||
#undef LIBCURL_PROTOCOL_FTPS |
||||
|
||||
/* Defined if libcurl supports HTTP */ |
||||
#undef LIBCURL_PROTOCOL_HTTP |
||||
|
||||
/* Defined if libcurl supports HTTPS */ |
||||
#undef LIBCURL_PROTOCOL_HTTPS |
||||
|
||||
/* Defined if libcurl supports IMAP */ |
||||
#undef LIBCURL_PROTOCOL_IMAP |
||||
|
||||
/* Defined if libcurl supports LDAP */ |
||||
#undef LIBCURL_PROTOCOL_LDAP |
||||
|
||||
/* Defined if libcurl supports POP3 */ |
||||
#undef LIBCURL_PROTOCOL_POP3 |
||||
|
||||
/* Defined if libcurl supports RTSP */ |
||||
#undef LIBCURL_PROTOCOL_RTSP |
||||
|
||||
/* Defined if libcurl supports SMTP */ |
||||
#undef LIBCURL_PROTOCOL_SMTP |
||||
|
||||
/* Defined if libcurl supports TELNET */ |
||||
#undef LIBCURL_PROTOCOL_TELNET |
||||
|
||||
/* Defined if libcurl supports TFTP */ |
||||
#undef LIBCURL_PROTOCOL_TFTP |
||||
|
||||
/* Define to 1 if your C compiler doesn't accept -c and -o together. */ |
||||
#undef NO_MINUS_C_MINUS_O |
||||
|
||||
/* Name of package */ |
||||
#undef PACKAGE |
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */ |
||||
#undef PACKAGE_BUGREPORT |
||||
|
||||
/* Define to the full name of this package. */ |
||||
#define PACKAGE_NAME "ccminer" |
||||
|
||||
/* Define to the full name and version of this package. */ |
||||
#define PACKAGE_STRING "ccminer 2014.03.23" |
||||
|
||||
/* Define to the one symbol short name of this package. */ |
||||
#undef PACKAGE_TARNAME |
||||
|
||||
/* Define to the home page for this package. */ |
||||
#undef PACKAGE_URL |
||||
|
||||
/* Define to the version of this package. */ |
||||
#define PACKAGE_VERSION "2014.03.23" |
||||
|
||||
/* If using the C implementation of alloca, define if you know the
|
||||
direction of stack growth for your system; otherwise it will be |
||||
automatically deduced at runtime. |
||||
STACK_DIRECTION > 0 => grows toward higher addresses |
||||
STACK_DIRECTION < 0 => grows toward lower addresses |
||||
STACK_DIRECTION = 0 => direction of growth unknown */ |
||||
#undef STACK_DIRECTION |
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */ |
||||
#undef STDC_HEADERS |
||||
|
||||
/* Define to 1 if AVX assembly is available. */ |
||||
#undef USE_AVX |
||||
|
||||
/* Define to 1 if XOP assembly is available. */ |
||||
#undef USE_XOP |
||||
|
||||
/* Version number of package */ |
||||
#undef VERSION |
||||
|
||||
/* Define curl_free() as free() if our version of curl lacks curl_free. */ |
||||
#undef curl_free |
||||
|
||||
/* Define to `unsigned int' if <sys/types.h> does not define. */ |
||||
#undef size_t |
||||
|
@ -0,0 +1,463 @@
@@ -0,0 +1,463 @@
|
||||
// Auf Groestlcoin spezialisierte Version von Groestl |
||||
|
||||
#include <cuda.h> |
||||
#include "cuda_runtime.h" |
||||
#include "device_launch_parameters.h" |
||||
|
||||
#include <stdio.h> |
||||
#include <memory.h> |
||||
|
||||
#define USE_SHARED 1 |
||||
|
||||
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); |
||||
|
||||
// Folgende Definitionen später durch header ersetzen |
||||
typedef unsigned char uint8_t; |
||||
typedef unsigned int uint32_t; |
||||
typedef unsigned long long uint64_t; |
||||
|
||||
// globaler Speicher für alle HeftyHashes aller Threads |
||||
__constant__ uint32_t pTarget[8]; // Single GPU |
||||
extern uint32_t *d_resultNonce[8]; |
||||
|
||||
// globaler Speicher für unsere Ergebnisse |
||||
uint32_t *d_hashGROESTLCOINoutput[8]; |
||||
|
||||
__constant__ uint32_t groestlcoin_gpu_state[32]; |
||||
__constant__ uint32_t groestlcoin_gpu_msg[32]; |
||||
__constant__ uint32_t sha256coin_gpu_constantTable[64]; |
||||
__constant__ uint32_t sha256coin_gpu_register[8]; |
||||
|
||||
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) |
||||
|
||||
#define PC32up(j, r) ((uint32_t)((j) + (r))) |
||||
#define PC32dn(j, r) 0 |
||||
#define QC32up(j, r) 0xFFFFFFFF |
||||
#define QC32dn(j, r) (((uint32_t)(r) << 24) ^ SPH_T32(~((uint32_t)(j) << 24))) |
||||
|
||||
#define B32_0(x) ((x) & 0xFF) |
||||
#define B32_1(x) (((x) >> 8) & 0xFF) |
||||
#define B32_2(x) (((x) >> 16) & 0xFF) |
||||
#define B32_3(x) ((x) >> 24) |
||||
|
||||
#define SPH_C32(x) ((uint32_t)(x ## U)) |
||||
#define C32e(x) ((SPH_C32(x) >> 24) \ |
||||
| ((SPH_C32(x) >> 8) & SPH_C32(0x0000FF00)) \ |
||||
| ((SPH_C32(x) << 8) & SPH_C32(0x00FF0000)) \ |
||||
| ((SPH_C32(x) << 24) & SPH_C32(0xFF000000))) |
||||
|
||||
#if USE_SHARED |
||||
#define T0up(x) (*((uint32_t*)mixtabs + ( (x)))) |
||||
#define T0dn(x) (*((uint32_t*)mixtabs + (256+(x)))) |
||||
#define T1up(x) (*((uint32_t*)mixtabs + (512+(x)))) |
||||
#define T1dn(x) (*((uint32_t*)mixtabs + (768+(x)))) |
||||
#define T2up(x) (*((uint32_t*)mixtabs + (1024+(x)))) |
||||
#define T2dn(x) (*((uint32_t*)mixtabs + (1280+(x)))) |
||||
#define T3up(x) (*((uint32_t*)mixtabs + (1536+(x)))) |
||||
#define T3dn(x) (*((uint32_t*)mixtabs + (1792+(x)))) |
||||
#else |
||||
#define T0up(x) tex1Dfetch(t0up1, x) |
||||
#define T0dn(x) tex1Dfetch(t0dn1, x) |
||||
#define T1up(x) tex1Dfetch(t1up1, x) |
||||
#define T1dn(x) tex1Dfetch(t1dn1, x) |
||||
#define T2up(x) tex1Dfetch(t2up1, x) |
||||
#define T2dn(x) tex1Dfetch(t2dn1, x) |
||||
#define T3up(x) tex1Dfetch(t3up1, x) |
||||
#define T3dn(x) tex1Dfetch(t3dn1, x) |
||||
#endif |
||||
texture<unsigned int, 1, cudaReadModeElementType> t0up1; |
||||
texture<unsigned int, 1, cudaReadModeElementType> t0dn1; |
||||
texture<unsigned int, 1, cudaReadModeElementType> t1up1; |
||||
texture<unsigned int, 1, cudaReadModeElementType> t1dn1; |
||||
texture<unsigned int, 1, cudaReadModeElementType> t2up1; |
||||
texture<unsigned int, 1, cudaReadModeElementType> t2dn1; |
||||
texture<unsigned int, 1, cudaReadModeElementType> t3up1; |
||||
texture<unsigned int, 1, cudaReadModeElementType> t3dn1; |
||||
|
||||
extern uint32_t T0up_cpu[]; |
||||
extern uint32_t T0dn_cpu[]; |
||||
extern uint32_t T1up_cpu[]; |
||||
extern uint32_t T1dn_cpu[]; |
||||
extern uint32_t T2up_cpu[]; |
||||
extern uint32_t T2dn_cpu[]; |
||||
extern uint32_t T3up_cpu[]; |
||||
extern uint32_t T3dn_cpu[]; |
||||
extern uint32_t sha256_cpu_hashTable[]; |
||||
extern uint32_t sha256_cpu_constantTable[]; |
||||
|
||||
#define S(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) |
||||
#define R(x, n) ((x) >> (n)) |
||||
#define Ch(x, y, z) ((x & (y ^ z)) ^ z) |
||||
#define Maj(x, y, z) ((x & (y | z)) | (y & z)) |
||||
#define S0(x) (S(x, 2) ^ S(x, 13) ^ S(x, 22)) |
||||
#define S1(x) (S(x, 6) ^ S(x, 11) ^ S(x, 25)) |
||||
#define s0(x) (S(x, 7) ^ S(x, 18) ^ R(x, 3)) |
||||
#define s1(x) (S(x, 17) ^ S(x, 19) ^ R(x, 10)) |
||||
|
||||
#define SWAB32(x) ( ((x & 0x000000FF) << 24) | ((x & 0x0000FF00) << 8) | ((x & 0x00FF0000) >> 8) | ((x & 0xFF000000) >> 24) ) |
||||
|
||||
|
||||
__device__ void groestlcoin_perm_P(uint32_t *a, char *mixtabs) |
||||
{ |
||||
uint32_t t[32]; |
||||
|
||||
//#pragma unroll 14 |
||||
for(int r=0;r<14;r++) |
||||
{ |
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) |
||||
{ |
||||
a[(k*2)+0] ^= PC32up(k * 0x10, r); |
||||
//a[(k<<1)+1] ^= PC32dn(k * 0x10, r); |
||||
} |
||||
|
||||
// RBTT |
||||
#pragma unroll 16 |
||||
for(int k=0;k<32;k+=2) |
||||
{ |
||||
t[k + 0] = T0up( B32_0(a[k & 0x1f]) ) ^ |
||||
T1up( B32_1(a[(k + 2) & 0x1f]) ) ^ |
||||
T2up( B32_2(a[(k + 4) & 0x1f]) ) ^ |
||||
T3up( B32_3(a[(k + 6) & 0x1f]) ) ^ |
||||
T0dn( B32_0(a[(k + 9) & 0x1f]) ) ^ |
||||
T1dn( B32_1(a[(k + 11) & 0x1f]) ) ^ |
||||
T2dn( B32_2(a[(k + 13) & 0x1f]) ) ^ |
||||
T3dn( B32_3(a[(k + 23) & 0x1f]) ); |
||||
|
||||
t[k + 1] = T0dn( B32_0(a[k & 0x1f]) ) ^ |
||||
T1dn( B32_1(a[(k + 2) & 0x1f]) ) ^ |
||||
T2dn( B32_2(a[(k + 4) & 0x1f]) ) ^ |
||||
T3dn( B32_3(a[(k + 6) & 0x1f]) ) ^ |
||||
T0up( B32_0(a[(k + 9) & 0x1f]) ) ^ |
||||
T1up( B32_1(a[(k + 11) & 0x1f]) ) ^ |
||||
T2up( B32_2(a[(k + 13) & 0x1f]) ) ^ |
||||
T3up( B32_3(a[(k + 23) & 0x1f]) ); |
||||
} |
||||
#pragma unroll 32 |
||||
for(int k=0;k<32;k++) |
||||
a[k] = t[k]; |
||||
} |
||||
} |
||||
|
||||
__device__ void groestlcoin_perm_Q(uint32_t *a, char *mixtabs) |
||||
{ |
||||
//#pragma unroll 14 |
||||
for(int r=0;r<14;r++) |
||||
{ |
||||
uint32_t t[32]; |
||||
|
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) |
||||
{ |
||||
a[(k*2)+0] ^= QC32up(k * 0x10, r); |
||||
a[(k*2)+1] ^= QC32dn(k * 0x10, r); |
||||
} |
||||
|
||||
// RBTT |
||||
#pragma unroll 16 |
||||
for(int k=0;k<32;k+=2) |
||||
{ |
||||
t[k + 0] = T0up( B32_0(a[(k + 2) & 0x1f]) ) ^ |
||||
T1up( B32_1(a[(k + 6) & 0x1f]) ) ^ |
||||
T2up( B32_2(a[(k + 10) & 0x1f]) ) ^ |
||||
T3up( B32_3(a[(k + 22) & 0x1f]) ) ^ |
||||
T0dn( B32_0(a[(k + 1) & 0x1f]) ) ^ |
||||
T1dn( B32_1(a[(k + 5) & 0x1f]) ) ^ |
||||
T2dn( B32_2(a[(k + 9) & 0x1f]) ) ^ |
||||
T3dn( B32_3(a[(k + 13) & 0x1f]) ); |
||||
|
||||
t[k + 1] = T0dn( B32_0(a[(k + 2) & 0x1f]) ) ^ |
||||
T1dn( B32_1(a[(k + 6) & 0x1f]) ) ^ |
||||
T2dn( B32_2(a[(k + 10) & 0x1f]) ) ^ |
||||
T3dn( B32_3(a[(k + 22) & 0x1f]) ) ^ |
||||
T0up( B32_0(a[(k + 1) & 0x1f]) ) ^ |
||||
T1up( B32_1(a[(k + 5) & 0x1f]) ) ^ |
||||
T2up( B32_2(a[(k + 9) & 0x1f]) ) ^ |
||||
T3up( B32_3(a[(k + 13) & 0x1f]) ); |
||||
} |
||||
#pragma unroll 32 |
||||
for(int k=0;k<32;k++) |
||||
a[k] = t[k]; |
||||
} |
||||
} |
||||
#if USE_SHARED |
||||
__global__ void __launch_bounds__(256) |
||||
#else |
||||
__global__ void |
||||
#endif |
||||
|
||||
groestlcoin_gpu_hash(int threads, uint32_t startNounce, void *outputHash, uint32_t *resNounce) |
||||
{ |
||||
#if USE_SHARED |
||||
extern __shared__ char mixtabs[]; |
||||
|
||||
*((uint32_t*)mixtabs + ( threadIdx.x)) = tex1Dfetch(t0up1, threadIdx.x); |
||||
*((uint32_t*)mixtabs + (256+threadIdx.x)) = tex1Dfetch(t0dn1, threadIdx.x); |
||||
*((uint32_t*)mixtabs + (512+threadIdx.x)) = tex1Dfetch(t1up1, threadIdx.x); |
||||
*((uint32_t*)mixtabs + (768+threadIdx.x)) = tex1Dfetch(t1dn1, threadIdx.x); |
||||
*((uint32_t*)mixtabs + (1024+threadIdx.x)) = tex1Dfetch(t2up1, threadIdx.x); |
||||
*((uint32_t*)mixtabs + (1280+threadIdx.x)) = tex1Dfetch(t2dn1, threadIdx.x); |
||||
*((uint32_t*)mixtabs + (1536+threadIdx.x)) = tex1Dfetch(t3up1, threadIdx.x); |
||||
*((uint32_t*)mixtabs + (1792+threadIdx.x)) = tex1Dfetch(t3dn1, threadIdx.x); |
||||
|
||||
__syncthreads(); |
||||
#endif |
||||
|
||||
int thread = (blockDim.x * blockIdx.x + threadIdx.x); |
||||
if (thread < threads) |
||||
{ |
||||
///// |
||||
///// Lieber groestl, mach, dass es abgeht!!! |
||||
///// |
||||
// GROESTL |
||||
uint32_t message[32]; |
||||
uint32_t state[32]; |
||||
|
||||
// SHA |
||||
// jeder thread in diesem Block bekommt sein eigenes W Array im Shared memory |
||||
uint32_t g[32]; |
||||
|
||||
|
||||
#pragma unroll 32 |
||||
for(int k=0;k<32;k++) |
||||
{ |
||||
state[k] = groestlcoin_gpu_state[k]; |
||||
message[k] = groestlcoin_gpu_msg[k]; |
||||
} |
||||
|
||||
uint32_t nounce = startNounce + thread; |
||||
message[19] = SWAB32(nounce); |
||||
|
||||
#pragma unroll 32 |
||||
for(int u=0;u<32;u++) |
||||
g[u] = message[u] ^ state[u]; |
||||
|
||||
// Perm |
||||
#if USE_SHARED |
||||
groestlcoin_perm_P(g, mixtabs); |
||||
groestlcoin_perm_Q(message, mixtabs); |
||||
#else |
||||
groestlcoin_perm_P(g, NULL); |
||||
groestlcoin_perm_Q(message, NULL); |
||||
#endif |
||||
|
||||
#pragma unroll 32 |
||||
for(int u=0;u<32;u++) |
||||
{ |
||||
state[u] ^= g[u] ^ message[u]; |
||||
g[u] = state[u]; |
||||
} |
||||
|
||||
#if USE_SHARED |
||||
groestlcoin_perm_P(g, mixtabs); |
||||
#else |
||||
groestlcoin_perm_P(g, NULL); |
||||
#endif |
||||
|
||||
#pragma unroll 32 |
||||
for(int u=0;u<32;u++) |
||||
state[u] ^= g[u]; |
||||
|
||||
//// |
||||
//// 2. Runde groestl |
||||
//// |
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) |
||||
message[k] = state[k + 16]; |
||||
|
||||
#pragma unroll 32 |
||||
for(int k=0;k<32;k++) |
||||
state[k] = groestlcoin_gpu_state[k]; |
||||
|
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) |
||||
message[k+16] = 0; |
||||
|
||||
message[16] = 0x80; |
||||
message[31] = 0x01000000; |
||||
|
||||
#pragma unroll 32 |
||||
for(int u=0;u<32;u++) |
||||
g[u] = message[u] ^ state[u]; |
||||
|
||||
// Perm |
||||
#if USE_SHARED |
||||
groestlcoin_perm_P(g, mixtabs); |
||||
groestlcoin_perm_Q(message, mixtabs); |
||||
#else |
||||
groestlcoin_perm_P(g, NULL); |
||||
groestlcoin_perm_Q(message, NULL); |
||||
#endif |
||||
|
||||
#pragma unroll 32 |
||||
for(int u=0;u<32;u++) |
||||
{ |
||||
state[u] ^= g[u] ^ message[u]; |
||||
g[u] = state[u]; |
||||
} |
||||
|
||||
#if USE_SHARED |
||||
groestlcoin_perm_P(g, mixtabs); |
||||
#else |
||||
groestlcoin_perm_P(g, NULL); |
||||
#endif |
||||
|
||||
#pragma unroll 32 |
||||
for(int u=0;u<32;u++) |
||||
state[u] ^= g[u]; |
||||
|
||||
/* |
||||
#pragma unroll 8 |
||||
for(int k=0;k<8;k++) |
||||
hash[k] = state[k+16]; |
||||
*/ |
||||
|
||||
// kopiere Ergebnis |
||||
/* |
||||
#pragma unroll 16 |
||||
for(int k=0;k<16;k++) |
||||
((uint32_t*)outputHash)[16*thread+k] = state[k + 16]; |
||||
*/ |
||||
int i; |
||||
bool rc = true; |
||||
|
||||
for (i = 7; i >= 0; i--) { |
||||
if (state[i+16] > pTarget[i]) { |
||||
rc = false; |
||||
break; |
||||
} |
||||
if (state[i+16] < pTarget[i]) { |
||||
rc = true; |
||||
break; |
||||
} |
||||
} |
||||
|
||||
if(rc == true) |
||||
{ |
||||
if(resNounce[0] > nounce) |
||||
{ |
||||
resNounce[0] = nounce; |
||||
/* |
||||
#pragma unroll 8 |
||||
for(int k=0;k<8;k++) |
||||
((uint32_t*)outputHash)[k] = (hash[k]); |
||||
*/ |
||||
} |
||||
} |
||||
|
||||
} |
||||
} |
||||
|
||||
#define texDef(texname, texmem, texsource, texsize) \ |
||||
unsigned int *texmem; \ |
||||
cudaMalloc(&texmem, texsize); \ |
||||
cudaMemcpy(texmem, texsource, texsize, cudaMemcpyHostToDevice); \ |
||||
texname.normalized = 0; \ |
||||
texname.filterMode = cudaFilterModePoint; \ |
||||
texname.addressMode[0] = cudaAddressModeClamp; \ |
||||
{ cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<unsigned int>(); \ |
||||
cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); } \ |
||||
|
||||
// Setup-Funktionen |
||||
__host__ void groestlcoin_cpu_init(int thr_id, int threads) |
||||
{ |
||||
cudaSetDevice(thr_id); |
||||
cudaDeviceSetCacheConfig( cudaFuncCachePreferShared ); |
||||
// Texturen mit obigem Makro initialisieren |
||||
texDef(t0up1, d_T0up, T0up_cpu, sizeof(uint32_t)*256); |
||||
texDef(t0dn1, d_T0dn, T0dn_cpu, sizeof(uint32_t)*256); |
||||
texDef(t1up1, d_T1up, T1up_cpu, sizeof(uint32_t)*256); |
||||
texDef(t1dn1, d_T1dn, T1dn_cpu, sizeof(uint32_t)*256); |
||||
texDef(t2up1, d_T2up, T2up_cpu, sizeof(uint32_t)*256); |
||||
texDef(t2dn1, d_T2dn, T2dn_cpu, sizeof(uint32_t)*256); |
||||
texDef(t3up1, d_T3up, T3up_cpu, sizeof(uint32_t)*256); |
||||
texDef(t3dn1, d_T3dn, T3dn_cpu, sizeof(uint32_t)*256); |
||||
|
||||
// Kopiere die Hash-Tabellen in den GPU-Speicher |
||||
cudaMemcpyToSymbol( sha256coin_gpu_constantTable, |
||||
sha256_cpu_constantTable, |
||||
sizeof(uint32_t) * 64 ); |
||||
|
||||
// Startvektor |
||||
cudaMemcpyToSymbol( sha256coin_gpu_register, |
||||
sha256_cpu_hashTable, |
||||
sizeof(uint32_t) * 8 ); |
||||
|
||||
// setze register |
||||
uint32_t groestl_state_init[32]; |
||||
memset(groestl_state_init, 0, sizeof(uint32_t) * 32); |
||||
groestl_state_init[31] = 0x20000; |
||||
|
||||
// state speichern |
||||
cudaMemcpyToSymbol( groestlcoin_gpu_state, |
||||
groestl_state_init, |
||||
128); |
||||
|
||||
cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t)); |
||||
|
||||
// Speicher für alle Ergebnisse belegen (nur für Debug) |
||||
cudaMalloc(&d_hashGROESTLCOINoutput[thr_id], 8 * sizeof(uint32_t) * threads); |
||||
} |
||||
|
||||
__host__ void groestlcoin_cpu_setBlock(int thr_id, void *data, void *pTargetIn) |
||||
{ |
||||
// Nachricht expandieren und setzen |
||||
uint32_t msgBlock[32]; |
||||
|
||||
memset(msgBlock, 0, sizeof(uint32_t) * 32); |
||||
memcpy(&msgBlock[0], data, 80); |
||||
|
||||
// Erweitere die Nachricht auf den Nachrichtenblock (padding) |
||||
// Unsere Nachricht hat 80 Byte |
||||
msgBlock[20] = 0x80; |
||||
msgBlock[31] = 0x01000000; |
||||
|
||||
// groestl512 braucht hierfür keinen CPU-Code (die einzige Runde wird |
||||
// auf der GPU ausgeführt) |
||||
|
||||
// Blockheader setzen (korrekte Nonce und Hefty Hash fehlen da drin noch) |
||||
cudaMemcpyToSymbol( groestlcoin_gpu_msg, |
||||
msgBlock, |
||||
128); |
||||
|
||||
cudaMemset(d_resultNonce[thr_id], 0xFF, sizeof(uint32_t)); |
||||
cudaMemcpyToSymbol( pTarget, |
||||
pTargetIn, |
||||
sizeof(uint32_t) * 8 ); |
||||
} |
||||
|
||||
__host__ void groestlcoin_cpu_hash(int thr_id, int threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce) |
||||
{ |
||||
#if USE_SHARED |
||||
const int threadsperblock = 256; // Alignment mit mixtab Grösse. NICHT ÄNDERN |
||||
#else |
||||
const int threadsperblock = 512; // so einstellen wie gewünscht ;-) |
||||
#endif |
||||
|
||||
// berechne wie viele Thread Blocks wir brauchen |
||||
dim3 grid((threads + threadsperblock-1)/threadsperblock); |
||||
dim3 block(threadsperblock); |
||||
|
||||
// Größe des dynamischen Shared Memory Bereichs (abhängig von der Threadanzahl) |
||||
#if USE_SHARED |
||||
size_t shared_size = 8 * 256 * sizeof(uint32_t); |
||||
#else |
||||
size_t shared_size = 0; |
||||
#endif |
||||
|
||||
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size); |
||||
//fprintf(stderr, "ThrID: %d\n", thr_id); |
||||
cudaMemset(d_resultNonce[thr_id], 0xFF, sizeof(uint32_t)); |
||||
groestlcoin_gpu_hash<<<grid, block, shared_size>>>(threads, startNounce, d_hashGROESTLCOINoutput[thr_id], d_resultNonce[thr_id]); |
||||
|
||||
// Strategisches Sleep Kommando zur Senkung der CPU Last |
||||
MyStreamSynchronize(NULL, 0, thr_id); |
||||
|
||||
cudaMemcpy(nounce, d_resultNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost); |
||||
|
||||
/// Debug |
||||
//cudaMemcpy(outputHashes, d_hashGROESTLCOINoutput[thr_id], 8 * sizeof(uint32_t) * threads, cudaMemcpyDeviceToHost); |
||||
|
||||
// Nounce |
||||
//cudaMemcpy(nounce, d_resultNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost); |
||||
} |
@ -0,0 +1,8 @@
@@ -0,0 +1,8 @@
|
||||
#ifndef _CUDA_GROESTLCOIN_H |
||||
#define _CUDA_GROESTLCOIN_H |
||||
|
||||
void groestlcoin_cpu_init(int thr_id, int threads); |
||||
void groestlcoin_cpu_setBlock(int thr_id, void *data, void *pTargetIn); |
||||
void groestlcoin_cpu_hash(int thr_id, int threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce); |
||||
|
||||
#endif |
@ -0,0 +1,175 @@
@@ -0,0 +1,175 @@
|
||||
#include "uint256.h" |
||||
#include "sph_groestl.h" |
||||
|
||||
#include "cpuminer-config.h" |
||||
#include "miner.h" |
||||
|
||||
#include <string.h> |
||||
#include <stdint.h> |
||||
#include "cuda_groestlcoin.h" |
||||
#include <openssl/sha.h> |
||||
|
||||
#define SWAP32(x) \ |
||||
((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) | \ |
||||
(((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu)) |
||||
|
||||
void sha256func(unsigned char *hash, const unsigned char *data, int len) |
||||
{ |
||||
uint32_t S[16], T[16]; |
||||
int i, r; |
||||
|
||||
sha256_init(S); |
||||
for (r = len; r > -9; r -= 64) { |
||||
if (r < 64) |
||||
memset(T, 0, 64); |
||||
memcpy(T, data + len - r, r > 64 ? 64 : (r < 0 ? 0 : r)); |
||||
if (r >= 0 && r < 64) |
||||
((unsigned char *)T)[r] = 0x80; |
||||
for (i = 0; i < 16; i++) |
||||
T[i] = be32dec(T + i); |
||||
if (r < 56) |
||||
T[15] = 8 * len; |
||||
sha256_transform(S, T, 0); |
||||
} |
||||
/*
|
||||
memcpy(S + 8, sha256d_hash1 + 8, 32); |
||||
sha256_init(T); |
||||
sha256_transform(T, S, 0); |
||||
*/ |
||||
for (i = 0; i < 8; i++) |
||||
be32enc((uint32_t *)hash + i, T[i]); |
||||
} |
||||
|
||||
static void groestlhash(void *state, const void *input) |
||||
{ |
||||
// Tryout GPU-groestl
|
||||
|
||||
sph_groestl512_context ctx_groestl[2]; |
||||
static unsigned char pblank[1]; |
||||
int ii; |
||||
uint32_t mask = 8; |
||||
uint32_t zero = 0; |
||||
|
||||
|
||||
//these uint512 in the c++ source of the client are backed by an array of uint32
|
||||
uint32_t hashA[16], hashB[16]; |
||||
|
||||
|
||||
sph_groestl512_init(&ctx_groestl[0]); |
||||
sph_groestl512 (&ctx_groestl[0], input, 80); //6
|
||||
sph_groestl512_close(&ctx_groestl[0], hashA); //7
|
||||
|
||||
sph_groestl512_init(&ctx_groestl[1]); |
||||
sph_groestl512 (&ctx_groestl[1], hashA, 64); //6
|
||||
sph_groestl512_close(&ctx_groestl[1], hashB); //7
|
||||
|
||||
memcpy(state, hashB, 32); |
||||
} |
||||
|
||||
|
||||
|
||||
extern "C" int scanhash_groestlcoin(int thr_id, uint32_t *pdata, const uint32_t *ptarget, |
||||
uint32_t max_nonce, unsigned long *hashes_done) |
||||
{ |
||||
uint32_t start_nonce = pdata[19]++; |
||||
const uint32_t Htarg = ptarget[7]; |
||||
const uint32_t throughPut = 4096 * 128; |
||||
//const uint32_t throughPut = 1;
|
||||
int i; |
||||
uint32_t *outputHash = (uint32_t*)malloc(throughPut * 16 * sizeof(uint32_t)); |
||||
|
||||
// init
|
||||
static bool init[8] = { false, false, false, false, false, false, false, false }; |
||||
if(!init[thr_id]) |
||||
{ |
||||
groestlcoin_cpu_init(thr_id, throughPut); |
||||
init[thr_id] = true; |
||||
} |
||||
|
||||
// Endian Drehung ist notwendig
|
||||
//char testdata[] = {"\x70\x00\x00\x00\x5d\x38\x5b\xa1\x14\xd0\x79\x97\x0b\x29\xa9\x41\x8f\xd0\x54\x9e\x7d\x68\xa9\x5c\x7f\x16\x86\x21\xa3\x14\x20\x10\x00\x00\x00\x00\x57\x85\x86\xd1\x49\xfd\x07\xb2\x2f\x3a\x8a\x34\x7c\x51\x6d\xe7\x05\x2f\x03\x4d\x2b\x76\xff\x68\xe0\xd6\xec\xff\x9b\x77\xa4\x54\x89\xe3\xfd\x51\x17\x32\x01\x1d\xf0\x73\x10\x00"};
|
||||
//pdata = (uint32_t*)testdata;
|
||||
uint32_t endiandata[32]; |
||||
for (int kk=0; kk < 32; kk++) |
||||
be32enc(&endiandata[kk], pdata[kk]); |
||||
|
||||
// Context mit dem Endian gedrehten Blockheader vorbereiten (Nonce wird später ersetzt)
|
||||
groestlcoin_cpu_setBlock(thr_id, endiandata, (void*)ptarget); |
||||
|
||||
do { |
||||
// GPU
|
||||
uint32_t foundNounce = 0xFFFFFFFF; |
||||
|
||||
groestlcoin_cpu_hash(thr_id, throughPut, pdata[19], outputHash, &foundNounce); |
||||
|
||||
/*
|
||||
{ |
||||
for(i=0;i<throughPut;i++) |
||||
{ |
||||
uint32_t tmpHash[8]; |
||||
endiandata[19] = SWAP32(pdata[19]); |
||||
groestlhash(tmpHash, endiandata); |
||||
|
||||
int ii; |
||||
printf("result GPU: "); |
||||
for (ii=0; ii < 32; ii++) |
||||
{ |
||||
printf ("%.2x",((uint8_t*)&outputHash[8*i])[ii]); |
||||
}; |
||||
printf ("\n"); |
||||
|
||||
|
||||
groestlhash(tmpHash, endiandata); |
||||
printf("result CPU: "); |
||||
for (ii=0; ii < 32; ii++) |
||||
{ |
||||
printf ("%.2x",((uint8_t*)tmpHash)[ii]); |
||||
}; |
||||
|
||||
|
||||
} |
||||
exit(0); |
||||
} |
||||
*/ |
||||
if(foundNounce < 0xffffffff) |
||||
{ |
||||
uint32_t tmpHash[8]; |
||||
endiandata[19] = SWAP32(foundNounce); |
||||
groestlhash(tmpHash, endiandata); |
||||
if (((tmpHash[7]&0xFFFFFF00)==0) && |
||||
fulltest(tmpHash, ptarget)) { |
||||
pdata[19] = foundNounce; |
||||
*hashes_done = foundNounce - start_nonce; |
||||
free(outputHash); |
||||
return true; |
||||
} |
||||
|
||||
foundNounce = 0xffffffff; |
||||
/*
|
||||
int ii; |
||||
printf("result GPU: "); |
||||
for (ii=0; ii < 32; ii++) |
||||
{ |
||||
printf ("%.2x",((uint8_t*)&outputHash[0])[ii]); |
||||
}; |
||||
printf ("\n"); |
||||
printf("result CPU: "); |
||||
for (ii=0; ii < 32; ii++) |
||||
{ |
||||
printf ("%.2x",((uint8_t*)tmpHash)[ii]); |
||||
}; |
||||
printf ("\n"); |
||||
*/ |
||||
} |
||||
|
||||
if (pdata[19] + throughPut < pdata[19]) |
||||
pdata[19] = max_nonce; |
||||
else pdata[19] += throughPut; |
||||
|
||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart); |
||||
|
||||
*hashes_done = pdata[19] - start_nonce; |
||||
free(outputHash); |
||||
return 0; |
||||
} |
||||
|
Loading…
Reference in new issue