diff --git a/Makefile.am b/Makefile.am index ca58d96..c523ffa 100644 --- a/Makefile.am +++ b/Makefile.am @@ -32,13 +32,6 @@ ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@ ccminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@ ccminer_CPPFLAGS = -msse2 @LIBCURL_CPPFLAGS@ @OPENMP_CFLAGS@ $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) -DSCRYPT_KECCAK512 -DSCRYPT_CHACHA -DSCRYPT_CHOOSE_COMPILETIME +# we're now targeting all major compute architectures within one binary. .cu.o: - $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_35 --maxrregcount=124 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< - -## Thrust needs Compute 2.0 minimum -#heavy.o: heavy.cu -# $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< -# -#cuda_hefty1.o: cuda_hefty1.cu -# $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< - + $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -gencode=arch=compute_10,code=\"sm_10,compute_10\" -gencode=arch=compute_20,code=\"sm_20,compute_20\" -gencode=arch=compute_30,code=\"sm_30,compute_30\" -gencode=arch=compute_35,code=\"sm_35,compute_35\" --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< diff --git a/Makefile.in b/Makefile.in index f4a36fa..0925b1f 100644 --- a/Makefile.in +++ b/Makefile.in @@ -1033,14 +1033,9 @@ uninstall-am: uninstall-binPROGRAMS uninstall uninstall-am uninstall-binPROGRAMS +# we're now targeting all major compute architectures within one binary. .cu.o: - $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_35 --maxrregcount=124 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< - -#heavy.o: heavy.cu -# $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< -# -#cuda_hefty1.o: cuda_hefty1.cu -# $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< + $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -gencode=arch=compute_10,code=\"sm_10,compute_10\" -gencode=arch=compute_20,code=\"sm_20,compute_20\" -gencode=arch=compute_30,code=\"sm_30,compute_30\" -gencode=arch=compute_35,code=\"sm_35,compute_35\" --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $< # Tell versions [3.59,3.63) of GNU make to not export all variables. # Otherwise a system limit (for SysV at least) may be exceeded. diff --git a/README.txt b/README.txt index fe4b9ac..31dbac5 100644 --- a/README.txt +++ b/README.txt @@ -1,5 +1,5 @@ -ccMiner release 0.3 (Mar 23th 2014) - Groestlcoin Release +ccMiner release 0.4 (Mar 24th 2014) - Groestlcoin Pool Release ------------------------------------------------------------- *************************************************************** @@ -107,7 +107,13 @@ from your old clunkers. >>> RELEASE HISTORY <<< - Match, 23 2014 added Groestlcoin support. stratum status unknown + March, 24 2014 fixed Groestl pool support + + went back to Compute 1.x for cuda_hefty1.cu kernel by + default after numerous reports of ccminer v0.2/v0.3 + not working with HeavyCoin for some people. + + March, 23 2014 added Groestlcoin support. stratum status unknown (the only pool is currently down for fixing issues) March, 21 2014 use of shared memory in Fugue256 kernel boosts hash rates diff --git a/ccminer.vcxproj b/ccminer.vcxproj index e12cfb2..029fceb 100644 --- a/ccminer.vcxproj +++ b/ccminer.vcxproj @@ -277,7 +277,16 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)" - + + compute_10,sm_10 + compute_10,sm_10 + 124 + 124 + compute_10,sm_10 + 124 + compute_10,sm_10 + 124 + diff --git a/configure b/configure index cb6956a..c565c17 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.68 for ccminer 2014.03.23. +# Generated by GNU Autoconf 2.68 for ccminer 2014.03.24. # # # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, @@ -557,8 +557,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='ccminer' PACKAGE_TARNAME='ccminer' -PACKAGE_VERSION='2014.03.23' -PACKAGE_STRING='ccminer 2014.03.23' +PACKAGE_VERSION='2014.03.24' +PACKAGE_STRING='ccminer 2014.03.24' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1297,7 +1297,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures ccminer 2014.03.23 to adapt to many kinds of systems. +\`configure' configures ccminer 2014.03.24 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1368,7 +1368,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of ccminer 2014.03.23:";; + short | recursive ) echo "Configuration of ccminer 2014.03.24:";; esac cat <<\_ACEOF @@ -1469,7 +1469,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -ccminer configure 2014.03.23 +ccminer configure 2014.03.24 generated by GNU Autoconf 2.68 Copyright (C) 2010 Free Software Foundation, Inc. @@ -1972,7 +1972,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by ccminer $as_me 2014.03.23, which was +It was created by ccminer $as_me 2014.03.24, which was generated by GNU Autoconf 2.68. Invocation command line was $ $0 $@ @@ -2901,7 +2901,7 @@ fi # Define the identity of the package. PACKAGE='ccminer' - VERSION='2014.03.23' + VERSION='2014.03.24' cat >>confdefs.h <<_ACEOF @@ -7118,7 +7118,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by ccminer $as_me 2014.03.23, which was +This file was extended by ccminer $as_me 2014.03.24, which was generated by GNU Autoconf 2.68. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -7184,7 +7184,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -ccminer config.status 2014.03.23 +ccminer config.status 2014.03.24 configured by $0, generated by GNU Autoconf 2.68, with options \\"\$ac_cs_config\\" diff --git a/configure.ac b/configure.ac index 0d75c39..18063f9 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([ccminer], [2014.03.23]) +AC_INIT([ccminer], [2014.03.24]) AC_PREREQ([2.59c]) AC_CANONICAL_SYSTEM diff --git a/cpu-miner.c b/cpu-miner.c index 3877f95..e88b79e 100644 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -669,7 +669,7 @@ static void stratum_gen_work(struct stratum_ctx *sctx, struct work *work) if (opt_algo == ALGO_HEAVY) heavycoin_hash(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size); else - if (opt_algo == ALGO_FUGUE256) + if (opt_algo == ALGO_FUGUE256 || opt_algo == ALGO_GROESTL) SHA256((unsigned char*)sctx->job.coinbase, sctx->job.coinbase_size, (unsigned char*)merkle_root); else sha256d(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size); @@ -719,7 +719,7 @@ static void stratum_gen_work(struct stratum_ctx *sctx, struct work *work) free(xnonce2str); } - if (opt_algo == ALGO_FUGUE256) + if (opt_algo == ALGO_FUGUE256 || opt_algo == ALGO_GROESTL) diff_to_target(work->target, sctx->job.diff / 256.0); else diff_to_target(work->target, sctx->job.diff); @@ -1346,7 +1346,7 @@ static void signal_handler(int sig) } #endif -#define PROGRAM_VERSION "0.2" +#define PROGRAM_VERSION "0.4" int main(int argc, char *argv[]) { struct thr_info *thr; diff --git a/cpuminer-config.h b/cpuminer-config.h index 9196f3e..50914ab 100644 --- a/cpuminer-config.h +++ b/cpuminer-config.h @@ -152,7 +152,7 @@ #define PACKAGE_NAME "ccminer" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "ccminer 2014.03.23" +#define PACKAGE_STRING "ccminer 2014.03.24" /* Define to the one symbol short name of this package. */ #undef PACKAGE_TARNAME @@ -161,7 +161,7 @@ #undef PACKAGE_URL /* Define to the version of this package. */ -#define PACKAGE_VERSION "2014.03.23" +#define PACKAGE_VERSION "2014.03.24" /* If using the C implementation of alloca, define if you know the direction of stack growth for your system; otherwise it will be diff --git a/cuda_groestlcoin.cu b/cuda_groestlcoin.cu index 606ae9f..442b1dc 100644 --- a/cuda_groestlcoin.cu +++ b/cuda_groestlcoin.cu @@ -82,8 +82,6 @@ extern uint32_t T2up_cpu[]; extern uint32_t T2dn_cpu[]; extern uint32_t T3up_cpu[]; extern uint32_t T3dn_cpu[]; -extern uint32_t sha256_cpu_hashTable[]; -extern uint32_t sha256_cpu_constantTable[]; #define S(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) #define R(x, n) ((x) >> (n)) @@ -212,15 +210,14 @@ __global__ void // GROESTL uint32_t message[32]; uint32_t state[32]; - - // SHA - // jeder thread in diesem Block bekommt sein eigenes W Array im Shared memory uint32_t g[32]; #pragma unroll 32 for(int k=0;k<32;k++) { + // TODO: die Vorbelegung mit Nullen braucht nicht zwingend aus dem + // constant Memory zu lesen. Das ist Verschwendung von Bandbreite. state[k] = groestlcoin_gpu_state[k]; message[k] = groestlcoin_gpu_msg[k]; } @@ -230,12 +227,12 @@ __global__ void #pragma unroll 32 for(int u=0;u<32;u++) - g[u] = message[u] ^ state[u]; + g[u] = message[u] ^ state[u]; // TODO: state ist fast ueberall 0. // Perm #if USE_SHARED - groestlcoin_perm_P(g, mixtabs); - groestlcoin_perm_Q(message, mixtabs); + groestlcoin_perm_P(g, mixtabs); // TODO: g[] entspricht fast genau message[] + groestlcoin_perm_Q(message, mixtabs); // kann man das ausnutzen? #else groestlcoin_perm_P(g, NULL); groestlcoin_perm_Q(message, NULL); @@ -244,6 +241,8 @@ __global__ void #pragma unroll 32 for(int u=0;u<32;u++) { + // TODO: kann man evtl. das xor mit g[u] vorziehen hinter die groestlcoin_perm_P Funktion + // was den Registerbedarf senken koennte? state[u] ^= g[u] ^ message[u]; g[u] = state[u]; } @@ -373,17 +372,10 @@ __host__ void groestlcoin_cpu_init(int thr_id, int threads) texDef(t3up1, d_T3up, T3up_cpu, sizeof(uint32_t)*256); texDef(t3dn1, d_T3dn, T3dn_cpu, sizeof(uint32_t)*256); - // Kopiere die Hash-Tabellen in den GPU-Speicher - cudaMemcpyToSymbol( sha256coin_gpu_constantTable, - sha256_cpu_constantTable, - sizeof(uint32_t) * 64 ); - - // Startvektor - cudaMemcpyToSymbol( sha256coin_gpu_register, - sha256_cpu_hashTable, - sizeof(uint32_t) * 8 ); - // setze register + // TODO: fast vollstaendige Vorbelegung mit Nullen. + // da besteht doch Optimierungspotenzial im GPU Kernel + // denn mit Nullen braucht man nicht wirklich rechnen. uint32_t groestl_state_init[32]; memset(groestl_state_init, 0, sizeof(uint32_t) * 32); groestl_state_init[31] = 0x20000; diff --git a/cuda_hefty1.cu b/cuda_hefty1.cu index 52637d3..f1333d3 100644 --- a/cuda_hefty1.cu +++ b/cuda_hefty1.cu @@ -5,9 +5,6 @@ #include #include -#define USE_SHARED 0 -#define W_ALIGNMENT 65 - // Folgende Definitionen später durch header ersetzen typedef unsigned int uint32_t; typedef unsigned char uint8_t; diff --git a/groestlcoin.cpp b/groestlcoin.cpp index 0a3dcf2..b117ee5 100644 --- a/groestlcoin.cpp +++ b/groestlcoin.cpp @@ -136,12 +136,14 @@ extern "C" int scanhash_groestlcoin(int thr_id, uint32_t *pdata, const uint32_t uint32_t tmpHash[8]; endiandata[19] = SWAP32(foundNounce); groestlhash(tmpHash, endiandata); - if (((tmpHash[7]&0xFFFFFF00)==0) && + if (tmpHash[7] <= Htarg && fulltest(tmpHash, ptarget)) { pdata[19] = foundNounce; *hashes_done = foundNounce - start_nonce; free(outputHash); return true; + } else { + applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNounce); } foundNounce = 0xffffffff;