diff --git a/Makefile.am b/Makefile.am
index ca58d96..c523ffa 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -32,13 +32,6 @@ ccminer_LDFLAGS = $(PTHREAD_FLAGS) @CUDA_LDFLAGS@
ccminer_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ @CUDA_LIBS@ @OPENMP_CFLAGS@ @LIBS@
ccminer_CPPFLAGS = -msse2 @LIBCURL_CPPFLAGS@ @OPENMP_CFLAGS@ $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES) -DSCRYPT_KECCAK512 -DSCRYPT_CHACHA -DSCRYPT_CHOOSE_COMPILETIME
+# we're now targeting all major compute architectures within one binary.
.cu.o:
- $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_35 --maxrregcount=124 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
-
-## Thrust needs Compute 2.0 minimum
-#heavy.o: heavy.cu
-# $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
-#
-#cuda_hefty1.o: cuda_hefty1.cu
-# $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
-
+ $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -gencode=arch=compute_10,code=\"sm_10,compute_10\" -gencode=arch=compute_20,code=\"sm_20,compute_20\" -gencode=arch=compute_30,code=\"sm_30,compute_30\" -gencode=arch=compute_35,code=\"sm_35,compute_35\" --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
diff --git a/Makefile.in b/Makefile.in
index f4a36fa..0925b1f 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -1033,14 +1033,9 @@ uninstall-am: uninstall-binPROGRAMS
uninstall uninstall-am uninstall-binPROGRAMS
+# we're now targeting all major compute architectures within one binary.
.cu.o:
- $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_35 --maxrregcount=124 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
-
-#heavy.o: heavy.cu
-# $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
-#
-#cuda_hefty1.o: cuda_hefty1.cu
-# $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -arch=compute_20 --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
+ $(NVCC) @CFLAGS@ -Xptxas "-abi=no -v" -gencode=arch=compute_10,code=\"sm_10,compute_10\" -gencode=arch=compute_20,code=\"sm_20,compute_20\" -gencode=arch=compute_30,code=\"sm_30,compute_30\" -gencode=arch=compute_35,code=\"sm_35,compute_35\" --maxrregcount=63 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
diff --git a/README.txt b/README.txt
index fe4b9ac..31dbac5 100644
--- a/README.txt
+++ b/README.txt
@@ -1,5 +1,5 @@
-ccMiner release 0.3 (Mar 23th 2014) - Groestlcoin Release
+ccMiner release 0.4 (Mar 24th 2014) - Groestlcoin Pool Release
-------------------------------------------------------------
***************************************************************
@@ -107,7 +107,13 @@ from your old clunkers.
>>> RELEASE HISTORY <<<
- Match, 23 2014 added Groestlcoin support. stratum status unknown
+ March, 24 2014 fixed Groestl pool support
+
+ went back to Compute 1.x for cuda_hefty1.cu kernel by
+ default after numerous reports of ccminer v0.2/v0.3
+ not working with HeavyCoin for some people.
+
+ March, 23 2014 added Groestlcoin support. stratum status unknown
(the only pool is currently down for fixing issues)
March, 21 2014 use of shared memory in Fugue256 kernel boosts hash rates
diff --git a/ccminer.vcxproj b/ccminer.vcxproj
index e12cfb2..029fceb 100644
--- a/ccminer.vcxproj
+++ b/ccminer.vcxproj
@@ -277,7 +277,16 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"
-
+
+ compute_10,sm_10
+ compute_10,sm_10
+ 124
+ 124
+ compute_10,sm_10
+ 124
+ compute_10,sm_10
+ 124
+
diff --git a/configure b/configure
index cb6956a..c565c17 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.68 for ccminer 2014.03.23.
+# Generated by GNU Autoconf 2.68 for ccminer 2014.03.24.
#
#
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -557,8 +557,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='ccminer'
PACKAGE_TARNAME='ccminer'
-PACKAGE_VERSION='2014.03.23'
-PACKAGE_STRING='ccminer 2014.03.23'
+PACKAGE_VERSION='2014.03.24'
+PACKAGE_STRING='ccminer 2014.03.24'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1297,7 +1297,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures ccminer 2014.03.23 to adapt to many kinds of systems.
+\`configure' configures ccminer 2014.03.24 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1368,7 +1368,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of ccminer 2014.03.23:";;
+ short | recursive ) echo "Configuration of ccminer 2014.03.24:";;
esac
cat <<\_ACEOF
@@ -1469,7 +1469,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-ccminer configure 2014.03.23
+ccminer configure 2014.03.24
generated by GNU Autoconf 2.68
Copyright (C) 2010 Free Software Foundation, Inc.
@@ -1972,7 +1972,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by ccminer $as_me 2014.03.23, which was
+It was created by ccminer $as_me 2014.03.24, which was
generated by GNU Autoconf 2.68. Invocation command line was
$ $0 $@
@@ -2901,7 +2901,7 @@ fi
# Define the identity of the package.
PACKAGE='ccminer'
- VERSION='2014.03.23'
+ VERSION='2014.03.24'
cat >>confdefs.h <<_ACEOF
@@ -7118,7 +7118,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by ccminer $as_me 2014.03.23, which was
+This file was extended by ccminer $as_me 2014.03.24, which was
generated by GNU Autoconf 2.68. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -7184,7 +7184,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-ccminer config.status 2014.03.23
+ccminer config.status 2014.03.24
configured by $0, generated by GNU Autoconf 2.68,
with options \\"\$ac_cs_config\\"
diff --git a/configure.ac b/configure.ac
index 0d75c39..18063f9 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,4 +1,4 @@
-AC_INIT([ccminer], [2014.03.23])
+AC_INIT([ccminer], [2014.03.24])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM
diff --git a/cpu-miner.c b/cpu-miner.c
index 3877f95..e88b79e 100644
--- a/cpu-miner.c
+++ b/cpu-miner.c
@@ -669,7 +669,7 @@ static void stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
if (opt_algo == ALGO_HEAVY)
heavycoin_hash(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size);
else
- if (opt_algo == ALGO_FUGUE256)
+ if (opt_algo == ALGO_FUGUE256 || opt_algo == ALGO_GROESTL)
SHA256((unsigned char*)sctx->job.coinbase, sctx->job.coinbase_size, (unsigned char*)merkle_root);
else
sha256d(merkle_root, sctx->job.coinbase, (int)sctx->job.coinbase_size);
@@ -719,7 +719,7 @@ static void stratum_gen_work(struct stratum_ctx *sctx, struct work *work)
free(xnonce2str);
}
- if (opt_algo == ALGO_FUGUE256)
+ if (opt_algo == ALGO_FUGUE256 || opt_algo == ALGO_GROESTL)
diff_to_target(work->target, sctx->job.diff / 256.0);
else
diff_to_target(work->target, sctx->job.diff);
@@ -1346,7 +1346,7 @@ static void signal_handler(int sig)
}
#endif
-#define PROGRAM_VERSION "0.2"
+#define PROGRAM_VERSION "0.4"
int main(int argc, char *argv[])
{
struct thr_info *thr;
diff --git a/cpuminer-config.h b/cpuminer-config.h
index 9196f3e..50914ab 100644
--- a/cpuminer-config.h
+++ b/cpuminer-config.h
@@ -152,7 +152,7 @@
#define PACKAGE_NAME "ccminer"
/* Define to the full name and version of this package. */
-#define PACKAGE_STRING "ccminer 2014.03.23"
+#define PACKAGE_STRING "ccminer 2014.03.24"
/* Define to the one symbol short name of this package. */
#undef PACKAGE_TARNAME
@@ -161,7 +161,7 @@
#undef PACKAGE_URL
/* Define to the version of this package. */
-#define PACKAGE_VERSION "2014.03.23"
+#define PACKAGE_VERSION "2014.03.24"
/* If using the C implementation of alloca, define if you know the
direction of stack growth for your system; otherwise it will be
diff --git a/cuda_groestlcoin.cu b/cuda_groestlcoin.cu
index 606ae9f..442b1dc 100644
--- a/cuda_groestlcoin.cu
+++ b/cuda_groestlcoin.cu
@@ -82,8 +82,6 @@ extern uint32_t T2up_cpu[];
extern uint32_t T2dn_cpu[];
extern uint32_t T3up_cpu[];
extern uint32_t T3dn_cpu[];
-extern uint32_t sha256_cpu_hashTable[];
-extern uint32_t sha256_cpu_constantTable[];
#define S(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
#define R(x, n) ((x) >> (n))
@@ -212,15 +210,14 @@ __global__ void
// GROESTL
uint32_t message[32];
uint32_t state[32];
-
- // SHA
- // jeder thread in diesem Block bekommt sein eigenes W Array im Shared memory
uint32_t g[32];
#pragma unroll 32
for(int k=0;k<32;k++)
{
+ // TODO: die Vorbelegung mit Nullen braucht nicht zwingend aus dem
+ // constant Memory zu lesen. Das ist Verschwendung von Bandbreite.
state[k] = groestlcoin_gpu_state[k];
message[k] = groestlcoin_gpu_msg[k];
}
@@ -230,12 +227,12 @@ __global__ void
#pragma unroll 32
for(int u=0;u<32;u++)
- g[u] = message[u] ^ state[u];
+ g[u] = message[u] ^ state[u]; // TODO: state ist fast ueberall 0.
// Perm
#if USE_SHARED
- groestlcoin_perm_P(g, mixtabs);
- groestlcoin_perm_Q(message, mixtabs);
+ groestlcoin_perm_P(g, mixtabs); // TODO: g[] entspricht fast genau message[]
+ groestlcoin_perm_Q(message, mixtabs); // kann man das ausnutzen?
#else
groestlcoin_perm_P(g, NULL);
groestlcoin_perm_Q(message, NULL);
@@ -244,6 +241,8 @@ __global__ void
#pragma unroll 32
for(int u=0;u<32;u++)
{
+ // TODO: kann man evtl. das xor mit g[u] vorziehen hinter die groestlcoin_perm_P Funktion
+ // was den Registerbedarf senken koennte?
state[u] ^= g[u] ^ message[u];
g[u] = state[u];
}
@@ -373,17 +372,10 @@ __host__ void groestlcoin_cpu_init(int thr_id, int threads)
texDef(t3up1, d_T3up, T3up_cpu, sizeof(uint32_t)*256);
texDef(t3dn1, d_T3dn, T3dn_cpu, sizeof(uint32_t)*256);
- // Kopiere die Hash-Tabellen in den GPU-Speicher
- cudaMemcpyToSymbol( sha256coin_gpu_constantTable,
- sha256_cpu_constantTable,
- sizeof(uint32_t) * 64 );
-
- // Startvektor
- cudaMemcpyToSymbol( sha256coin_gpu_register,
- sha256_cpu_hashTable,
- sizeof(uint32_t) * 8 );
-
// setze register
+ // TODO: fast vollstaendige Vorbelegung mit Nullen.
+ // da besteht doch Optimierungspotenzial im GPU Kernel
+ // denn mit Nullen braucht man nicht wirklich rechnen.
uint32_t groestl_state_init[32];
memset(groestl_state_init, 0, sizeof(uint32_t) * 32);
groestl_state_init[31] = 0x20000;
diff --git a/cuda_hefty1.cu b/cuda_hefty1.cu
index 52637d3..f1333d3 100644
--- a/cuda_hefty1.cu
+++ b/cuda_hefty1.cu
@@ -5,9 +5,6 @@
#include
#include
-#define USE_SHARED 0
-#define W_ALIGNMENT 65
-
// Folgende Definitionen später durch header ersetzen
typedef unsigned int uint32_t;
typedef unsigned char uint8_t;
diff --git a/groestlcoin.cpp b/groestlcoin.cpp
index 0a3dcf2..b117ee5 100644
--- a/groestlcoin.cpp
+++ b/groestlcoin.cpp
@@ -136,12 +136,14 @@ extern "C" int scanhash_groestlcoin(int thr_id, uint32_t *pdata, const uint32_t
uint32_t tmpHash[8];
endiandata[19] = SWAP32(foundNounce);
groestlhash(tmpHash, endiandata);
- if (((tmpHash[7]&0xFFFFFF00)==0) &&
+ if (tmpHash[7] <= Htarg &&
fulltest(tmpHash, ptarget)) {
pdata[19] = foundNounce;
*hashes_done = foundNounce - start_nonce;
free(outputHash);
return true;
+ } else {
+ applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU!", thr_id, foundNounce);
}
foundNounce = 0xffffffff;