various small changes

heavy: reduce by 256 threads default intensity to all -i 20 cuda: put static thread init bools outside the code (made once) api: fix nvml header to build without
10 years ago · 8ad180cc70
30 changed files with 76 additions and 79 deletions
--- a/JHA/jackpotcoin.cu
+++ b/JHA/jackpotcoin.cu
@ -83,6 +83,8 @@ extern "C" unsigned int jackpothash(void *state, const void *input)
    return round;
 }
 static bool init[8] = { 0 };
 extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata,
    const uint32_t *ptarget, uint32_t max_nonce,
    unsigned long *hashes_done)
@ -95,7 +97,6 @@ extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata,
 	int throughput = opt_work_size ? opt_work_size : (1 << 20); // 256*4096
 	throughput = min(throughput, (int)(max_nonce - first_nonce));
 	static bool init[8] = {0,0,0,0,0,0,0,0};
 	if (!init[thr_id])
 	{
 		cudaSetDevice(device_map[thr_id]);
--- a/Makefile.am
+++ b/Makefile.am
@ -19,7 +19,7 @@ ccminer_SOURCES		= elist.h miner.h compat.h \
 			  compat/sys/time.h compat/getopt/getopt.h \
 			  crc32.c hefty1.c scrypt.c \
 			  ccminer.cpp util.cpp \
-			  api.cpp hashlog.cpp stats.cpp sysinfos.cpp cuda.cpp \
+			  api.cpp hashlog.cpp nvml.cpp stats.cpp sysinfos.cpp cuda.cpp \
 			  heavy/heavy.cu \
 			  heavy/cuda_blake512.cu heavy/cuda_blake512.h \
 			  heavy/cuda_combine.cu heavy/cuda_combine.h \
@ -53,7 +53,6 @@ ccminer_SOURCES		= elist.h miner.h compat.h \
 			  x11/s3.cu
 if HAVE_NVML
 ccminer_SOURCES += nvml.cpp
 nvml_defs = -DUSE_WRAPNVML
 nvml_libs = -ldl
 endif
--- a/api.cpp
+++ b/api.cpp
@ -31,13 +31,9 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 #include "compat.h"
 #include "miner.h"
 #ifdef USE_WRAPNVML
 #include "nvml.h"
 #endif
 #ifndef WIN32
 # include <errno.h>
@ -83,18 +79,11 @@ struct IP4ACCESS {
 static int ips = 1;
 static struct IP4ACCESS *ipaccess = NULL;
-// Big enough for largest API request
+#define MYBUFSIZ       16384
-//  though a PC with 100s of CPUs may exceed the size ...
+#define SOCK_REC_BUFSZ 256
-// Current code assumes it can socket send this size also
+#define QUEUE          10
 #define MYBUFSIZ	16384
 #define SOCK_REC_BUFSZ  256
 // Socket is on 127.0.0.1
 #define QUEUE	10
 #define ALLIP4 "0.0.0.0"
 #define ALLIP4         "0.0.0.0"
 static const char *localaddr = "127.0.0.1";
 static const char *UNAVAILABLE = " - API will not be available";
 static char *buffer = NULL;
@ -106,7 +95,6 @@ extern int opt_api_listen; /* port */
 extern uint32_t accepted_count;
 extern uint32_t rejected_count;
 extern int num_cpus;
 extern char driver_version[32];
 extern struct stratum_ctx stratum;
 extern char* rpc_user;
@ -115,6 +103,9 @@ extern float cpu_temp(int);
 extern uint32_t cpu_clock(int);
 // cuda.cpp
 int cuda_num_devices();
 int cuda_gpu_clocks(struct cgpu_info *gpu);
 char driver_version[32] = { 0 };
 /***************************************************************/
@ -132,7 +123,7 @@ static void gpustatus(int thr_id)
 		cgpu->gpu_temp = gpu_temp(cgpu);
 		cgpu->gpu_fan = gpu_fanpercent(cgpu);
 #endif
-		gpu_clocks(cgpu);
+		cuda_gpu_clocks(cgpu);
 		// todo: can be 0 if set by algo (auto)
 		if (opt_intensity == 0 && opt_work_size) {
@ -260,7 +251,7 @@ static void gpuhwinfos(int gpu_id)
 	gpu_info(cgpu);
 #endif
-	gpu_clocks(cgpu);
+	cuda_gpu_clocks(cgpu);
 	memset(pstate, 0, sizeof(pstate));
 	if (cgpu->gpu_pstate != -1)
--- a/blake32.cu
+++ b/blake32.cu
@ -385,11 +385,12 @@ void blake256_cpu_setBlock_16(uint32_t *penddata, const uint32_t *midstate, cons
 }
 #endif
 static bool init[8] = { 0 };
 extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
 	uint32_t max_nonce, unsigned long *hashes_done, int8_t blakerounds=14)
 {
 	const uint32_t first_nonce = pdata[19];
 	static bool init[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
 	uint64_t targetHigh = ((uint64_t*)ptarget)[3];
 	uint32_t _ALIGN(64) endiandata[20];
 #if PRECALC64
--- a/ccminer.cpp
+++ b/ccminer.cpp
@ -61,8 +61,8 @@ void cuda_devicenames();
 void cuda_devicereset();
 int cuda_finddevice(char *name);
 #ifdef USE_WRAPNVML
 #include "nvml.h"
 #ifdef USE_WRAPNVML
 wrap_nvml_handle *hnvml = NULL;
 #endif
--- a/configure.ac
+++ b/configure.ac
@ -1,4 +1,4 @@
-AC_INIT([ccminer], [1.5-git])
+AC_INIT([ccminer], [1.5.1-git])
 AC_PREREQ([2.59c])
 AC_CANONICAL_SYSTEM
--- a/cpuminer-config.h
+++ b/cpuminer-config.h
@ -156,7 +156,7 @@
 #define PACKAGE_NAME "ccminer"
 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "ccminer 1.5.0"
+#define PACKAGE_STRING "ccminer 1.5.1-git"
 /* Define to the one symbol short name of this package. */
 #define PACKAGE_TARNAME "ccminer"
@ -165,7 +165,7 @@
 #define PACKAGE_URL ""
 /* Define to the version of this package. */
-#define PACKAGE_VERSION "1.5.0"
+#define PACKAGE_VERSION "1.5.1-git"
 /* If using the C implementation of alloca, define if you know the
   direction of stack growth for your system; otherwise it will be
@ -188,7 +188,7 @@
 #define USE_XOP 1
 /* Version number of package */
-#define VERSION "1.5.0"
+#define VERSION "1.5.1-git"
 /* Define curl_free() as free() if our version of curl lacks curl_free. */
 /* #undef curl_free */
--- a/cuda.cpp
+++ b/cuda.cpp
@ -145,6 +145,18 @@ cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id)
 	return result;
 }
 int cuda_gpu_clocks(struct cgpu_info *gpu)
 {
 	cudaDeviceProp props;
 	if (cudaGetDeviceProperties(&props, gpu->gpu_id) == cudaSuccess) {
 		gpu->gpu_clock = props.clockRate;
 		gpu->gpu_memclock = props.memoryClockRate;
 		gpu->gpu_mem = props.totalGlobalMem;
 		return 0;
 	}
 	return -1;
 }
 void cudaReportHardwareFailure(int thr_id, cudaError_t err, const char* func)
 {
 	struct cgpu_info *gpu = &thr_info[thr_id].gpu;
--- a/cuda_nist5.cu
+++ b/cuda_nist5.cu
@ -65,6 +65,8 @@ extern "C" void nist5hash(void *state, const void *input)
    memcpy(state, hash, 32);
 }
 static bool init[8] = { 0 };
 extern "C" int scanhash_nist5(int thr_id, uint32_t *pdata,
    const uint32_t *ptarget, uint32_t max_nonce,
    unsigned long *hashes_done)
@ -77,18 +79,19 @@ extern "C" int scanhash_nist5(int thr_id, uint32_t *pdata,
 	int throughput = opt_work_size ? opt_work_size : (1 << 20); // 256*4096
 	throughput = min(throughput, (int) (max_nonce - first_nonce));
 	static bool init[8] = {0,0,0,0,0,0,0,0};
 	if (!init[thr_id])
 	{
 		cudaSetDevice(device_map[thr_id]);
 		// Konstanten kopieren, Speicher belegen
 		cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
 		quark_blake512_cpu_init(thr_id, throughput);
 		quark_groestl512_cpu_init(thr_id, throughput);
 		quark_jh512_cpu_init(thr_id, throughput);
 		quark_keccak512_cpu_init(thr_id, throughput);
 		quark_skein512_cpu_init(thr_id, throughput);
 		CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
 		cuda_check_cpu_init(thr_id, throughput);
 		init[thr_id] = true;
 	}
--- a/fuguecoin.cpp
+++ b/fuguecoin.cpp
@ -20,6 +20,8 @@ sph_fugue256_context  ctx_fugue_const[8];
    ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u)   | \
      (((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu))
 static bool init[8] = { 0 };
 extern "C" int scanhash_fugue256(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
 	uint32_t max_nonce, unsigned long *hashes_done)
 {
@ -32,7 +34,6 @@ extern "C" int scanhash_fugue256(int thr_id, uint32_t *pdata, const uint32_t *pt
 		((uint32_t*)ptarget)[7] = 0xf;
 	// init
 	static bool init[8] = { false, false, false, false, false, false, false, false };
 	if(!init[thr_id])
 	{
 		fugue256_cpu_init(thr_id, throughPut);
--- a/groestlcoin.cpp
+++ b/groestlcoin.cpp
@ -58,7 +58,7 @@ extern "C" void groestlhash(void *state, const void *input)
    memcpy(state, hashB, 32);
 }
-extern bool opt_benchmark;
+static bool init[8] = { 0 };
 extern "C" int scanhash_groestlcoin(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
    uint32_t max_nonce, unsigned long *hashes_done)
@ -73,7 +73,6 @@ extern "C" int scanhash_groestlcoin(int thr_id, uint32_t *pdata, const uint32_t
        ((uint32_t*)ptarget)[7] = 0x000000ff;
    // init
    static bool init[8] = { false, false, false, false, false, false, false, false };
    if(!init[thr_id])
    {
        groestlcoin_cpu_init(thr_id, throughPut);
--- a/heavy/heavy.cu
+++ b/heavy/heavy.cu
@ -134,9 +134,9 @@ int scanhash_heavy(int thr_id, uint32_t *pdata,
    const uint32_t *ptarget, uint32_t max_nonce,
    unsigned long *hashes_done, uint32_t maxvote, int blocklen)
 {
-    const uint32_t first_nonce = pdata[19]; /* to check */
+    const uint32_t first_nonce = pdata[19];
    // CUDA will process thousands of threads.
-    int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*2048
+    int throughput = opt_work_size ? opt_work_size : (1 << 19) - 256; // 256*2048
    throughput = min(throughput, (int)(max_nonce - first_nonce));
    int rc = 0;
--- a/keccak/keccak256.cu
+++ b/keccak/keccak256.cu
@ -34,6 +34,8 @@ extern "C" void keccak256_hash(void *state, const void *input)
 	memcpy(state, hash, 32);
 }
 static bool init[8] = { 0 };
 extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done)
@ -46,7 +48,6 @@ extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata,
 	int throughput = opt_work_size ? opt_work_size : (1 << 21); // 256*256*8*4
 	throughput = min(throughput, (int)(max_nonce - first_nonce));
 	static bool init[8] = {0,0,0,0,0,0,0,0};
 	if (!init[thr_id]) {
 		cudaSetDevice(device_map[thr_id]);
--- a/myriadgroestl.cpp
+++ b/myriadgroestl.cpp
@ -32,7 +32,7 @@ extern "C" void myriadhash(void *state, const void *input)
    memcpy(state, hashB, 32);
 }
-extern bool opt_benchmark;
+static bool init[8] = { 0 };
 extern "C" int scanhash_myriad(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
 	uint32_t max_nonce, unsigned long *hashes_done)
@ -51,7 +51,6 @@ extern "C" int scanhash_myriad(int thr_id, uint32_t *pdata, const uint32_t *ptar
 		((uint32_t*)ptarget)[7] = 0x0000ff;
 	// init
 	static bool init[8] = { false, false, false, false, false, false, false, false };
 	if(!init[thr_id])
 	{
 #if BIG_DEBUG
--- a/nvml.cpp
+++ b/nvml.cpp
@ -23,19 +23,16 @@
 #endif
 #include "miner.h"
 #include "nvml.h"
 #include "cuda_runtime.h"
 // cuda.cpp
 int cuda_num_devices();
 // geforce driver version
 char driver_version[32] = { 0 };
 #ifdef USE_WRAPNVML
 #include "nvml.h"
 extern wrap_nvml_handle *hnvml;
 extern char driver_version[32];
 static uint32_t device_bus_ids[8] = { 0 };
@ -770,15 +767,3 @@ int gpu_info(struct cgpu_info *gpu)
 }
 #endif /* USE_WRAPNVML */
 int gpu_clocks(struct cgpu_info *gpu)
 {
 	cudaDeviceProp props;
 	if (cudaGetDeviceProperties(&props, gpu->gpu_id) == cudaSuccess) {
 		gpu->gpu_clock = props.clockRate;
 		gpu->gpu_memclock = props.memoryClockRate;
 		gpu->gpu_mem = props.totalGlobalMem;
 		return 0;
 	}
 	return -1;
 }
--- a/nvml.h
+++ b/nvml.h
@ -13,6 +13,9 @@
 * John E. Stone - john.stone@gmail.com
 *
 */
 #ifdef USE_WRAPNVML
 #include "miner.h"
 /*
 * Ugly hacks to avoid dependencies on the real nvml.h until it starts
@ -130,17 +133,8 @@ int wrap_nvml_get_power_usage(wrap_nvml_handle *nvmlh,
                              int gpuindex,
                              unsigned int *milliwatts);
 /* nvapi functions */
 #ifdef WIN32
 int wrap_nvapi_init();
 #endif
 /* api functions */
 #include "miner.h"
 #ifdef USE_WRAPNVML
 int gpu_fanpercent(struct cgpu_info *gpu);
 float gpu_temp(struct cgpu_info *gpu);
 unsigned int gpu_power(struct cgpu_info *gpu);
@ -151,7 +145,9 @@ int gpu_busid(struct cgpu_info *gpu);
 /* pid/vid, sn and bios rev */
 int gpu_info(struct cgpu_info *gpu);
 /* nvapi functions */
 #ifdef WIN32
 int wrap_nvapi_init();
 #endif
-// cuda api based
+#endif /* USE_WRAPNVML */
 int gpu_clocks(struct cgpu_info *gpu);
--- a/pentablake.cu
+++ b/pentablake.cu
@ -486,11 +486,12 @@ void pentablake_cpu_setBlock_80(uint32_t *pdata, const uint32_t *ptarget)
 	CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Target, ptarget, 32, 0, cudaMemcpyHostToDevice));
 }
 static bool init[8] = { 0 };
 extern "C" int scanhash_pentablake(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
 	uint32_t max_nonce, unsigned long *hashes_done)
 {
 	const uint32_t first_nonce = pdata[19];
 	static bool init[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
 	uint32_t endiandata[20];
 	int rc = 0;
 	int throughput = opt_work_size ? opt_work_size : (128 * 2560); // 18.5
--- a/quark/animecoin.cu
+++ b/quark/animecoin.cu
@ -158,12 +158,13 @@ struct HashPredicate
 };
 */
 static bool init[8] = { 0 };
 extern "C" int scanhash_anime(int thr_id, uint32_t *pdata,
    const uint32_t *ptarget, uint32_t max_nonce,
    unsigned long *hashes_done)
 {
 	const uint32_t first_nonce = pdata[19];
 	static bool init[8] = { 0 };
 	int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*2048
 	throughput = min(throughput, (int)(max_nonce - first_nonce));
--- a/quark/quarkcoin.cu
+++ b/quark/quarkcoin.cu
@ -128,12 +128,13 @@ extern "C" void quarkhash(void *state, const void *input)
    memcpy(state, hash, 32);
 }
 static bool init[8] = { 0 };
 extern "C" int scanhash_quark(int thr_id, uint32_t *pdata,
    const uint32_t *ptarget, uint32_t max_nonce,
    unsigned long *hashes_done)
 {
 	const uint32_t first_nonce = pdata[19];
 	static bool init[8] = { 0 };
 	int throughput = opt_work_size ? opt_work_size : (1 << 20); // 256*4096
 	throughput = min(throughput, (int)(max_nonce - first_nonce));
--- a/qubit/deep.cu
+++ b/qubit/deep.cu
@ -52,13 +52,13 @@ extern "C" void deephash(void *state, const void *input)
 	memcpy(state, hash, 32);
 }
 static bool init[8] = { 0 };
 extern "C" int scanhash_deep(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done)
 {
 	const uint32_t first_nonce = pdata[19];
 	static bool init[8] = {0,0,0,0,0,0,0,0};
 	uint32_t endiandata[20];
 	int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8
 	throughput = min(throughput, (int)(max_nonce - first_nonce));
--- a/qubit/doom.cu
+++ b/qubit/doom.cu
@ -32,13 +32,13 @@ extern void doomhash(void *state, const void *input)
 	memcpy(state, hash, 32);
 }
 static bool init[8] = { 0 };
 extern "C" int scanhash_doom(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done)
 {
 	const uint32_t first_nonce = pdata[19];
 	static bool init[8] = {0,0,0,0,0,0,0,0};
 	uint32_t endiandata[20];
 	int throughput = opt_work_size ? opt_work_size : (1 << 22); // 256*256*8*8
 	throughput = min(throughput, (int)(max_nonce - first_nonce));
--- a/qubit/qubit.cu
+++ b/qubit/qubit.cu
@ -72,11 +72,12 @@ extern "C" void qubithash(void *state, const void *input)
 	memcpy(state, hash, 32);
 }
 static bool init[8] = { 0 };
 extern "C" int scanhash_qubit(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done)
 {
 	static bool init[8] = {0,0,0,0,0,0,0,0};
 	uint32_t endiandata[20];
 	const uint32_t first_nonce = pdata[19];
 	int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8
--- a/x11/fresh.cu
+++ b/x11/fresh.cu
@ -68,12 +68,13 @@ extern "C" void fresh_hash(void *state, const void *input)
 	memcpy(state, hash, 32);
 }
 static bool init[8] = { 0 };
 extern "C" int scanhash_fresh(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done)
 {
 	const uint32_t first_nonce = pdata[19];
 	static bool init[8] = {0,0,0,0,0,0,0,0};
 	uint32_t endiandata[20];
 	int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8;
--- a/x11/s3.cu
+++ b/x11/s3.cu
@ -49,13 +49,14 @@ extern "C" void s3hash(void *output, const void *input)
 	memcpy(output, hash, 32);
 }
 static bool init[8] = { 0 };
 /* Main S3 entry point */
 extern "C" int scanhash_s3(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done)
 {
 	const uint32_t first_nonce = pdata[19];
 	static bool init[8] = { 0 };
 	int intensity = 20; // 256*256*8*2;
 #ifdef WIN32
 	// reduce by one the intensity on windows
--- a/x11/x11.cu
+++ b/x11/x11.cu
@ -129,12 +129,13 @@ extern "C" void x11hash(void *output, const void *input)
 	memcpy(output, hash, 32);
 }
 static bool init[8] = { 0 };
 extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
    const uint32_t *ptarget, uint32_t max_nonce,
    unsigned long *hashes_done)
 {
 	const uint32_t first_nonce = pdata[19];
 	static bool init[8] = { 0 };
 	int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 20 : 19;
 	int throughput = opt_work_size ? opt_work_size : (1 << intensity); // 20=256*256*16;
 	throughput = min(throughput, (int)(max_nonce - first_nonce));
--- a/x13/x13.cu
+++ b/x13/x13.cu
@ -144,13 +144,13 @@ extern "C" void x13hash(void *output, const void *input)
 	memcpy(output, hash, 32);
 }
 static bool init[8] = { 0 };
 extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
    const uint32_t *ptarget, uint32_t max_nonce,
    unsigned long *hashes_done)
 {
 	const uint32_t first_nonce = pdata[19];
 	static bool init[8] = { 0 };
 	int intensity = 19; // (device_sm[device_map[thr_id]] > 500 && !is_windows()) ? 20 : 19;
 	int throughput = opt_work_size ? opt_work_size : (1 << intensity); // 19=256*256*8;
 	throughput = min(throughput, (int)(max_nonce - first_nonce));
--- a/x15/whirlpool.cu
+++ b/x15/whirlpool.cu
@ -49,12 +49,13 @@ extern "C" void wcoinhash(void *state, const void *input)
 	memcpy(state, hash, 32);
 }
 static bool init[8] = { 0 };
 extern "C" int scanhash_whc(int thr_id, uint32_t *pdata,
    const uint32_t *ptarget, uint32_t max_nonce,
    unsigned long *hashes_done)
 {
 	const uint32_t first_nonce = pdata[19];
 	static bool init[8] = {0,0,0,0,0,0,0,0};
 	uint32_t endiandata[20];
 	int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8;
 	throughput = min(throughput, (int)(max_nonce - first_nonce));
--- a/x15/x14.cu
+++ b/x15/x14.cu
@ -155,13 +155,13 @@ extern "C" void x14hash(void *output, const void *input)
 	memcpy(output, hash, 32);
 }
 static bool init[8] = { 0 };
 extern "C" int scanhash_x14(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done)
 {
 	const uint32_t first_nonce = pdata[19];
 	static bool init[8] = { 0 };
 	uint32_t endiandata[20];
 	int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8;
 	throughput = min(throughput, (int)(max_nonce - first_nonce));
--- a/x15/x15.cu
+++ b/x15/x15.cu
@ -165,12 +165,13 @@ extern "C" void x15hash(void *output, const void *input)
 	memcpy(output, hash, 32);
 }
 static bool init[8] = { 0 };
 extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done)
 {
 	const uint32_t first_nonce = pdata[19];
 	static bool init[8] = { 0 };
 	uint32_t endiandata[20];
 	int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8;
--- a/x17/x17.cu
+++ b/x17/x17.cu
@ -184,13 +184,13 @@ extern "C" void x17hash(void *output, const void *input)
 	memcpy(output, hash, 32);
 }
 static bool init[8] = { 0 };
 extern "C" int scanhash_x17(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
 	unsigned long *hashes_done)
 {
 	const uint32_t first_nonce = pdata[19];
 	static bool init[8] = { 0 };
 	int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8;
 	throughput = min(throughput, (int)(max_nonce - first_nonce));