diff --git a/JHA/cuda_jha_compactionTest.cu b/JHA/cuda_jha_compactionTest.cu index aab71b6..20e73db 100644 --- a/JHA/cuda_jha_compactionTest.cu +++ b/JHA/cuda_jha_compactionTest.cu @@ -10,9 +10,6 @@ static uint32_t *h_numValid[8]; static uint32_t *d_partSum[2][8]; // für bis zu vier partielle Summen -// aus heavy.cu -extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); - // True/False tester typedef uint32_t(*cuda_compactTestFunction_t)(uint32_t *inpHash); diff --git a/JHA/cuda_jha_keccak512.cu b/JHA/cuda_jha_keccak512.cu index e1a73d8..52ddd1f 100644 --- a/JHA/cuda_jha_keccak512.cu +++ b/JHA/cuda_jha_keccak512.cu @@ -3,9 +3,6 @@ #include "cuda_helper.h" -// aus heavy.cu -extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); - __constant__ uint64_t c_State[25]; __constant__ uint32_t c_PaddedMessage[18]; diff --git a/api.cpp b/api.cpp index 26a6829..a83c7cd 100644 --- a/api.cpp +++ b/api.cpp @@ -103,11 +103,8 @@ static int bye = 0; extern char *opt_api_allow; extern int opt_api_listen; /* port */ -extern uint64_t global_hashrate; extern uint32_t accepted_count; extern uint32_t rejected_count; -extern int device_map[8]; -extern char *device_name[8]; extern int num_cpus; extern char driver_version[32]; extern struct stratum_ctx stratum; @@ -203,7 +200,7 @@ static char *getsummary(char *params) "ALGO=%s;GPUS=%d;KHS=%.2f;ACC=%d;REJ=%d;" "ACCMN=%.3f;DIFF=%.6f;UPTIME=%.0f;TS=%u|", PACKAGE_NAME, PACKAGE_VERSION, APIVERSION, - algo, num_processors, (double)global_hashrate / 1000.0, + algo, active_gpus, (double)global_hashrate / 1000.0, accepted_count, rejected_count, accps, global_diff, uptime, (uint32_t) ts); return buffer; @@ -277,10 +274,10 @@ static void gpuhwinfos(int gpu_id) card = device_name[gpu_id]; - snprintf(buf, sizeof(buf), "GPU=%d;BUS=%hd;CARD=%s;MEM=%lu;" + snprintf(buf, sizeof(buf), "GPU=%d;BUS=%hd;CARD=%s;SM=%u;MEM=%lu;" "TEMP=%.1f;FAN=%d;FREQ=%d;MEMFREQ=%d;PST=%s;" "VID=%hx;PID=%hx;NVML=%d;NVAPI=%d;SN=%s;BIOS=%s|", - gpu_id, cgpu->gpu_bus, card, cgpu->gpu_mem, + gpu_id, cgpu->gpu_bus, card, cgpu->gpu_arch, cgpu->gpu_mem, cgpu->gpu_temp, cgpu->gpu_fan, cgpu->gpu_clock, cgpu->gpu_memclock, pstate, cgpu->gpu_vid, cgpu->gpu_pid, cgpu->nvml_id, cgpu->nvapi_id, cgpu->gpu_sn, cgpu->gpu_desc); @@ -304,12 +301,12 @@ static void syshwinfos() { char buf[256]; - float temp = cpu_temp(0); - uint32_t clock = cpu_clock(0); + int cputc = (int) cpu_temp(0); + uint32_t cpuclk = cpu_clock(0); memset(buf, 0, sizeof(buf)); - snprintf(buf, sizeof(buf), "OS=%s;NVDRIVER=%s;CPUS=%d;CPUTEMP=%.1f;CPUFREQ=%d|", - os_name(), driver_version, num_cpus, temp, clock); + snprintf(buf, sizeof(buf), "OS=%s;NVDRIVER=%s;CPUS=%d;CPUTEMP=%d;CPUFREQ=%d|", + os_name(), driver_version, num_cpus, cputc, cpuclk); strcat(buffer, buf); } @@ -341,9 +338,9 @@ static char *gethistory(char *params) for (int i = 0; i < records; i++) { time_t ts = data[i].tm_stat; p += sprintf(p, "GPU=%d;H=%u;KHS=%.2f;DIFF=%.6f;" - "COUNT=%u;FOUND=%u;TS=%u|", + "COUNT=%u;FOUND=%u;ID=%u;TS=%u|", data[i].gpu_id, data[i].height, data[i].hashrate, data[i].difficulty, - data[i].hashcount, data[i].hashfound, (uint32_t)ts); + data[i].hashcount, data[i].hashfound, data[i].uid, (uint32_t)ts); } return buffer; } diff --git a/blake32.cu b/blake32.cu index 87bd1ca..75e656a 100644 --- a/blake32.cu +++ b/blake32.cu @@ -425,7 +425,7 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt } if (!init[thr_id]) { - if (num_processors > 1) + if (active_gpus > 1) cudaSetDevice(device_map[thr_id]); CUDA_CALL_OR_RET_X(cudaMallocHost(&h_resNonce[thr_id], NBN * sizeof(uint32_t)), 0); CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonce[thr_id], NBN * sizeof(uint32_t)), 0); diff --git a/ccminer.cpp b/ccminer.cpp index ce68d13..3808c59 100644 --- a/ccminer.cpp +++ b/ccminer.cpp @@ -192,7 +192,6 @@ bool want_stratum = true; bool have_stratum = false; static bool submit_old = false; bool use_syslog = false; -static char* opt_syslog_pfx = (char*) PACKAGE_NAME; bool use_colors = true; static bool opt_background = false; bool opt_quiet = false; @@ -208,10 +207,10 @@ static double opt_difficulty = 1; // CH bool opt_trust_pool = false; uint16_t opt_vote = 9999; int num_cpus; -int num_processors; -int device_map[8] = {0,1,2,3,4,5,6,7}; // CB -char *device_name[8]; // CB -int device_sm[8]; +int active_gpus; +char * device_name[8]; +short device_map[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; +long device_sm[8] = { 0 }; char *rpc_user = NULL; static char *rpc_url; static char *rpc_userpass; @@ -241,8 +240,9 @@ int opt_statsavg = 30; int opt_intensity = 0; uint32_t opt_work_size = 0; /* default */ uint32_t opt_work_adds = 0; - -char *opt_api_allow = (char*) "127.0.0.1"; /* 0.0.0.0 for all ips */ +// strdup on char* to allow a common free() if used +static char* opt_syslog_pfx = strdup(PROGRAM_NAME); +char *opt_api_allow = strdup("127.0.0.1"); /* 0.0.0.0 for all ips */ int opt_api_listen = 4068; /* 0 to disable */ #ifdef HAVE_GETOPT_LONG @@ -409,6 +409,8 @@ void proper_exit(int reason) if (hnvml) wrap_nvml_destroy(hnvml); #endif + free(opt_syslog_pfx); + free(opt_api_allow); exit(reason); } @@ -1671,6 +1673,7 @@ static void parse_arg(int key, char *arg) if (p) { /* ip:port */ if (p - arg > 0) { + free(opt_api_allow); opt_api_allow = strdup(arg); opt_api_allow[p - arg] = '\0'; } @@ -1872,8 +1875,10 @@ static void parse_arg(int key, char *arg) case 1008: applog(LOG_INFO, "Now logging to syslog..."); use_syslog = true; - if (arg && strlen(arg)) + if (arg && strlen(arg)) { + free(opt_syslog_pfx); opt_syslog_pfx = strdup(arg); + } break; case 'd': // CB { @@ -1899,7 +1904,7 @@ static void parse_arg(int key, char *arg) } } // set number of active gpus - num_processors = opt_n_threads; + active_gpus = opt_n_threads; pch = strtok (NULL, ","); } } @@ -2079,7 +2084,7 @@ int main(int argc, char *argv[]) num_cpus = 1; // number of gpus - num_processors = cuda_num_devices(); + active_gpus = cuda_num_devices(); cuda_devicenames(); /* parse command line */ @@ -2133,12 +2138,12 @@ int main(int argc, char *argv[]) SetConsoleCtrlHandler((PHANDLER_ROUTINE)ConsoleHandler, TRUE); #endif - if (num_processors == 0) { + if (active_gpus == 0) { applog(LOG_ERR, "No CUDA devices found! terminating."); exit(1); } if (!opt_n_threads) - opt_n_threads = num_processors; + opt_n_threads = active_gpus; #ifdef HAVE_SYSLOG_H if (use_syslog) @@ -2208,7 +2213,7 @@ int main(int argc, char *argv[]) #ifdef USE_WRAPNVML #ifndef WIN32 - /* nvml is currently not usable on Windows (even for x64) */ + /* nvml is currently not the best choice on Windows (only in x64) */ hnvml = wrap_nvml_create(); if (hnvml) applog(LOG_INFO, "NVML GPU monitoring enabled."); @@ -2241,8 +2246,9 @@ int main(int argc, char *argv[]) thr = &thr_info[i]; thr->id = i; - thr->gpu.gpu_id = device_map[i]; thr->gpu.thr_id = i; + thr->gpu.gpu_id = (uint8_t) device_map[i]; + thr->gpu.gpu_arch = (uint16_t) device_sm[device_map[i]]; thr->q = tq_new(); if (!thr->q) return 1; @@ -2253,9 +2259,9 @@ int main(int argc, char *argv[]) } } - applog(LOG_INFO, "%d miner threads started, " + applog(LOG_INFO, "%d miner thread%s started, " "using '%s' algorithm.", - opt_n_threads, + opt_n_threads, opt_n_threads > 1 ? "s":"", algo_names[opt_algo]); #ifdef WIN32 diff --git a/ccminer.vcxproj b/ccminer.vcxproj index 1cf7791..e4ee1da 100644 --- a/ccminer.vcxproj +++ b/ccminer.vcxproj @@ -172,7 +172,7 @@ false 80 true - true + false compute_30,sm_30;compute_50,sm_50 --ptxas-options="-O2" %(AdditionalOptions) @@ -215,7 +215,7 @@ false 80 true - true + false compute_50,sm_50 @@ -366,7 +366,7 @@ - -Xptxas "-abi=yes -O2" %(AdditionalOptions) + -Xptxas "-abi=yes" %(AdditionalOptions) -Xptxas "-abi=yes" %(AdditionalOptions) @@ -375,7 +375,7 @@ 64 - --ptxas-options="-O2 -dlcm=cg" %(AdditionalOptions) + --ptxas-options="-dlcm=cg" %(AdditionalOptions) true @@ -384,7 +384,7 @@ 80 - --ptxas-options="-O2 -dlcm=cg" %(AdditionalOptions) + --ptxas-options="-dlcm=cg" %(AdditionalOptions) true @@ -402,7 +402,7 @@ false - -Xptxas "-abi=yes -O2" %(AdditionalOptions) + -Xptxas "-abi=yes" %(AdditionalOptions) -Xptxas "-abi=yes" %(AdditionalOptions) diff --git a/cuda.cpp b/cuda.cpp index 780b648..e409ca5 100644 --- a/cuda.cpp +++ b/cuda.cpp @@ -25,10 +25,6 @@ #include "compat.h" // sleep #endif -extern char *device_name[8]; -extern int device_map[8]; -extern int device_sm[8]; - // CUDA Devices on the System int cuda_num_devices() { @@ -74,7 +70,7 @@ void cuda_devicenames() cudaGetDeviceProperties(&props, device_map[i]); device_name[i] = strdup(props.name); - device_sm[i] = props.major * 100 + props.minor * 10; + device_sm[i] = (props.major * 100 + props.minor * 10); } } diff --git a/cuda_helper.h b/cuda_helper.h index d94e72a..f22100e 100644 --- a/cuda_helper.h +++ b/cuda_helper.h @@ -13,8 +13,8 @@ #include -extern int device_map[8]; -extern int device_sm[8]; +extern "C" short device_map[8]; +extern "C" long device_sm[8]; // common functions extern void cuda_check_cpu_init(int thr_id, int threads); diff --git a/fuguecoin.cpp b/fuguecoin.cpp index 3f31a16..ed928c2 100644 --- a/fuguecoin.cpp +++ b/fuguecoin.cpp @@ -13,9 +13,6 @@ extern "C" void my_fugue256(void *cc, const void *data, size_t len); extern "C" void my_fugue256_close(void *cc, void *dst); extern "C" void my_fugue256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst); -extern int device_map[8]; -extern int device_sm[8]; - // vorbereitete Kontexte nach den ersten 80 Bytes sph_fugue256_context ctx_fugue_const[8]; diff --git a/miner.h b/miner.h index 52d538c..90b9d52 100644 --- a/miner.h +++ b/miner.h @@ -386,6 +386,7 @@ struct cgpu_info { uint8_t has_monitoring; float gpu_temp; int gpu_fan; + uint16_t gpu_arch; int gpu_clock; int gpu_memclock; size_t gpu_mem; @@ -410,6 +411,7 @@ struct thr_api { }; struct stats_data { + uint32_t uid; uint32_t tm_stat; uint32_t hashcount; uint32_t height; @@ -454,7 +456,7 @@ extern bool opt_protocol; extern bool opt_tracegpu; extern int opt_intensity; extern int opt_n_threads; -extern int num_processors; +extern int active_gpus; extern int opt_timeout; extern bool want_longpoll; extern bool have_longpoll; @@ -478,6 +480,10 @@ extern uint32_t opt_work_size; extern uint64_t global_hashrate; extern double global_diff; +extern char* device_name[8]; +extern short device_map[8]; +extern long device_sm[8]; + #define CL_N "\x1B[0m" #define CL_RED "\x1B[31m" #define CL_GRN "\x1B[32m" diff --git a/pentablake.cu b/pentablake.cu index 726ba5f..e1e1d96 100644 --- a/pentablake.cu +++ b/pentablake.cu @@ -511,7 +511,7 @@ extern "C" int scanhash_pentablake(int thr_id, uint32_t *pdata, const uint32_t * ((uint32_t*)ptarget)[7] = 0x000F; if (!init[thr_id]) { - if (num_processors > 1) { + if (active_gpus > 1) { cudaSetDevice(device_map[thr_id]); } CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 64 * throughput)); diff --git a/quark/cuda_bmw512.cu b/quark/cuda_bmw512.cu index f037b04..ce4a773 100644 --- a/quark/cuda_bmw512.cu +++ b/quark/cuda_bmw512.cu @@ -3,9 +3,6 @@ #include "cuda_helper.h" -// aus heavy.cu -extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); - // die Message it Padding zur Berechnung auf der GPU __constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding) diff --git a/quark/cuda_jh512.cu b/quark/cuda_jh512.cu index 07453e8..8f62810 100644 --- a/quark/cuda_jh512.cu +++ b/quark/cuda_jh512.cu @@ -1,8 +1,5 @@ #include "cuda_helper.h" -// aus heavy.cu -extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); - typedef struct { uint32_t x[8][4]; /*the 1024-bit state, ( x[i][0] || x[i][1] || x[i][2] || x[i][3] ) is the ith row of the state in the pseudocode*/ uint32_t buffer[16]; /*the 512-bit message block to be hashed;*/ diff --git a/quark/cuda_quark_compactionTest.cu b/quark/cuda_quark_compactionTest.cu index 6a0ec5f..cac7562 100644 --- a/quark/cuda_quark_compactionTest.cu +++ b/quark/cuda_quark_compactionTest.cu @@ -8,10 +8,7 @@ static uint32_t *d_tempBranch1Nonces[8]; static uint32_t *d_numValid[8]; static uint32_t *h_numValid[8]; -static uint32_t *d_partSum[2][8]; // für bis zu vier partielle Summen - -// aus heavy.cu -extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); +static uint32_t *d_partSum[2][8]; // für bis zu vier partielle Summen // True/False tester typedef uint32_t(*cuda_compactTestFunction_t)(uint32_t *inpHash); @@ -83,7 +80,7 @@ __global__ void quark_compactTest_gpu_SCAN(uint32_t *data, int width, uint32_t * inpHash = &inpHashes[id<<4]; }else { - // Nonce-Liste verfügbar + // Nonce-Liste verfügbar int nonce = d_validNonceTable[id] - startNounce; inpHash = &inpHashes[nonce<<4]; } @@ -200,7 +197,7 @@ __global__ void quark_compactTest_gpu_SCATTER(uint32_t *sum, uint32_t *outp, cud inpHash = &inpHashes[id<<4]; }else { - // Nonce-Liste verfügbar + // Nonce-Liste verfügbar int nonce = d_validNonceTable[id] - startNounce; actNounce = nonce; inpHash = &inpHashes[nonce<<4]; @@ -335,7 +332,7 @@ __host__ void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t st int order) { // Wenn validNonceTable genutzt wird, dann werden auch nur die Nonces betrachtet, die dort enthalten sind - // "threads" ist in diesem Fall auf die Länge dieses Array's zu setzen! + // "threads" ist in diesem Fall auf die Länge dieses Array's zu setzen! quark_compactTest_cpu_dualCompaction(thr_id, threads, h_numValid[thr_id], d_nonces1, d_nonces2, @@ -351,7 +348,7 @@ __host__ void quark_compactTest_single_false_cpu_hash_64(int thr_id, int threads int order) { // Wenn validNonceTable genutzt wird, dann werden auch nur die Nonces betrachtet, die dort enthalten sind - // "threads" ist in diesem Fall auf die Länge dieses Array's zu setzen! + // "threads" ist in diesem Fall auf die Länge dieses Array's zu setzen! quark_compactTest_cpu_singleCompaction(thr_id, threads, h_numValid[thr_id], d_nonces1, h_QuarkFalseFunction[thr_id], startNounce, inpHashes, d_validNonceTable); diff --git a/quark/cuda_quark_keccak512.cu b/quark/cuda_quark_keccak512.cu index b205100..0ddbc05 100644 --- a/quark/cuda_quark_keccak512.cu +++ b/quark/cuda_quark_keccak512.cu @@ -3,9 +3,6 @@ #include "cuda_helper.h" -// heavy.cu -extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); - #define U32TO64_LE(p) \ (((uint64_t)(*p)) | (((uint64_t)(*(p + 1))) << 32)) diff --git a/stats.cpp b/stats.cpp index 5a17273..6b87b05 100644 --- a/stats.cpp +++ b/stats.cpp @@ -19,10 +19,9 @@ static uint64_t uid = 0; extern uint64_t global_hashrate; extern int opt_statsavg; -extern int device_map[8]; /** - * Store speed per thread (todo: compute vardiff ?) + * Store speed per thread */ void stats_remember_speed(int thr_id, uint32_t hashcount, double hashrate, uint8_t found, uint32_t height) { @@ -38,6 +37,7 @@ void stats_remember_speed(int thr_id, uint32_t hashcount, double hashrate, uint8 return; memset(&data, 0, sizeof(data)); + data.uid = uid; data.gpu_id = gpu; data.thr_id = (uint8_t)thr_id; data.tm_stat = (uint32_t) time(NULL); diff --git a/x11/cuda_x11_cubehash512.cu b/x11/cuda_x11_cubehash512.cu index cd2ccef..0a570dc 100644 --- a/x11/cuda_x11_cubehash512.cu +++ b/x11/cuda_x11_cubehash512.cu @@ -1,8 +1,5 @@ #include "cuda_helper.h" -// aus heavy.cu -extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); - typedef unsigned char BitSequence; #define CUBEHASH_ROUNDS 16 /* this is r for CubeHashr/b */ diff --git a/x13/cuda_x13_fugue512.cu b/x13/cuda_x13_fugue512.cu index 8b4a72b..4360a0f 100644 --- a/x13/cuda_x13_fugue512.cu +++ b/x13/cuda_x13_fugue512.cu @@ -7,9 +7,6 @@ */ #include "cuda_helper.h" -// aus heavy.cu -extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); - /* * X13 kernel implementation. * diff --git a/x15/cuda_x14_shabal512.cu b/x15/cuda_x14_shabal512.cu index 3d55747..60ffb32 100644 --- a/x15/cuda_x14_shabal512.cu +++ b/x15/cuda_x14_shabal512.cu @@ -1,10 +1,8 @@ /* - * Shabal-512 for X14/X15 (STUB) + * Shabal-512 for X14/X15 */ #include "cuda_helper.h" -extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); - /* $Id: shabal.c 175 2010-05-07 16:03:20Z tp $ */ /* * Shabal implementation. diff --git a/x15/cuda_x15_whirlpool.cu b/x15/cuda_x15_whirlpool.cu index f15133d..3e84818 100644 --- a/x15/cuda_x15_whirlpool.cu +++ b/x15/cuda_x15_whirlpool.cu @@ -11,8 +11,6 @@ #include "cuda_helper.h" -extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); - __constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding) __constant__ uint32_t pTarget[8]; diff --git a/x17/cuda_x17_haval512.cu b/x17/cuda_x17_haval512.cu index ba01cc9..ff9912d 100644 --- a/x17/cuda_x17_haval512.cu +++ b/x17/cuda_x17_haval512.cu @@ -48,9 +48,6 @@ #define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF)) -// in heavy.cu -extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); - static __constant__ uint32_t initVector[8]; static const uint32_t c_initVector[8] = { diff --git a/x17/cuda_x17_sha512.cu b/x17/cuda_x17_sha512.cu index aeb72fd..8ef2a27 100644 --- a/x17/cuda_x17_sha512.cu +++ b/x17/cuda_x17_sha512.cu @@ -47,9 +47,6 @@ #define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) #define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF)) -// in heavy.cu -extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); - static __constant__ uint64_t H_512[8]; static const uint64_t H512[8] = {