Browse Source

various extern cleanup + api history uids and gpu SM

uids could be useful to create graphes from history data

Note: please do a clean build after this commit (changes in miner.h)
master
Tanguy Pruvot 10 years ago
parent
commit
6ae28162db
  1. 3
      JHA/cuda_jha_compactionTest.cu
  2. 3
      JHA/cuda_jha_keccak512.cu
  3. 21
      api.cpp
  4. 2
      blake32.cu
  5. 38
      ccminer.cpp
  6. 12
      ccminer.vcxproj
  7. 6
      cuda.cpp
  8. 4
      cuda_helper.h
  9. 3
      fuguecoin.cpp
  10. 8
      miner.h
  11. 2
      pentablake.cu
  12. 3
      quark/cuda_bmw512.cu
  13. 3
      quark/cuda_jh512.cu
  14. 13
      quark/cuda_quark_compactionTest.cu
  15. 3
      quark/cuda_quark_keccak512.cu
  16. 4
      stats.cpp
  17. 3
      x11/cuda_x11_cubehash512.cu
  18. 3
      x13/cuda_x13_fugue512.cu
  19. 4
      x15/cuda_x14_shabal512.cu
  20. 2
      x15/cuda_x15_whirlpool.cu
  21. 3
      x17/cuda_x17_haval512.cu
  22. 3
      x17/cuda_x17_sha512.cu

3
JHA/cuda_jha_compactionTest.cu

@ -10,9 +10,6 @@ static uint32_t *h_numValid[8];
static uint32_t *d_partSum[2][8]; // für bis zu vier partielle Summen static uint32_t *d_partSum[2][8]; // für bis zu vier partielle Summen
// aus heavy.cu
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
// True/False tester // True/False tester
typedef uint32_t(*cuda_compactTestFunction_t)(uint32_t *inpHash); typedef uint32_t(*cuda_compactTestFunction_t)(uint32_t *inpHash);

3
JHA/cuda_jha_keccak512.cu

@ -3,9 +3,6 @@
#include "cuda_helper.h" #include "cuda_helper.h"
// aus heavy.cu
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
__constant__ uint64_t c_State[25]; __constant__ uint64_t c_State[25];
__constant__ uint32_t c_PaddedMessage[18]; __constant__ uint32_t c_PaddedMessage[18];

21
api.cpp

@ -103,11 +103,8 @@ static int bye = 0;
extern char *opt_api_allow; extern char *opt_api_allow;
extern int opt_api_listen; /* port */ extern int opt_api_listen; /* port */
extern uint64_t global_hashrate;
extern uint32_t accepted_count; extern uint32_t accepted_count;
extern uint32_t rejected_count; extern uint32_t rejected_count;
extern int device_map[8];
extern char *device_name[8];
extern int num_cpus; extern int num_cpus;
extern char driver_version[32]; extern char driver_version[32];
extern struct stratum_ctx stratum; extern struct stratum_ctx stratum;
@ -203,7 +200,7 @@ static char *getsummary(char *params)
"ALGO=%s;GPUS=%d;KHS=%.2f;ACC=%d;REJ=%d;" "ALGO=%s;GPUS=%d;KHS=%.2f;ACC=%d;REJ=%d;"
"ACCMN=%.3f;DIFF=%.6f;UPTIME=%.0f;TS=%u|", "ACCMN=%.3f;DIFF=%.6f;UPTIME=%.0f;TS=%u|",
PACKAGE_NAME, PACKAGE_VERSION, APIVERSION, PACKAGE_NAME, PACKAGE_VERSION, APIVERSION,
algo, num_processors, (double)global_hashrate / 1000.0, algo, active_gpus, (double)global_hashrate / 1000.0,
accepted_count, rejected_count, accepted_count, rejected_count,
accps, global_diff, uptime, (uint32_t) ts); accps, global_diff, uptime, (uint32_t) ts);
return buffer; return buffer;
@ -277,10 +274,10 @@ static void gpuhwinfos(int gpu_id)
card = device_name[gpu_id]; card = device_name[gpu_id];
snprintf(buf, sizeof(buf), "GPU=%d;BUS=%hd;CARD=%s;MEM=%lu;" snprintf(buf, sizeof(buf), "GPU=%d;BUS=%hd;CARD=%s;SM=%u;MEM=%lu;"
"TEMP=%.1f;FAN=%d;FREQ=%d;MEMFREQ=%d;PST=%s;" "TEMP=%.1f;FAN=%d;FREQ=%d;MEMFREQ=%d;PST=%s;"
"VID=%hx;PID=%hx;NVML=%d;NVAPI=%d;SN=%s;BIOS=%s|", "VID=%hx;PID=%hx;NVML=%d;NVAPI=%d;SN=%s;BIOS=%s|",
gpu_id, cgpu->gpu_bus, card, cgpu->gpu_mem, gpu_id, cgpu->gpu_bus, card, cgpu->gpu_arch, cgpu->gpu_mem,
cgpu->gpu_temp, cgpu->gpu_fan, cgpu->gpu_clock, cgpu->gpu_memclock, cgpu->gpu_temp, cgpu->gpu_fan, cgpu->gpu_clock, cgpu->gpu_memclock,
pstate, cgpu->gpu_vid, cgpu->gpu_pid, cgpu->nvml_id, cgpu->nvapi_id, pstate, cgpu->gpu_vid, cgpu->gpu_pid, cgpu->nvml_id, cgpu->nvapi_id,
cgpu->gpu_sn, cgpu->gpu_desc); cgpu->gpu_sn, cgpu->gpu_desc);
@ -304,12 +301,12 @@ static void syshwinfos()
{ {
char buf[256]; char buf[256];
float temp = cpu_temp(0); int cputc = (int) cpu_temp(0);
uint32_t clock = cpu_clock(0); uint32_t cpuclk = cpu_clock(0);
memset(buf, 0, sizeof(buf)); memset(buf, 0, sizeof(buf));
snprintf(buf, sizeof(buf), "OS=%s;NVDRIVER=%s;CPUS=%d;CPUTEMP=%.1f;CPUFREQ=%d|", snprintf(buf, sizeof(buf), "OS=%s;NVDRIVER=%s;CPUS=%d;CPUTEMP=%d;CPUFREQ=%d|",
os_name(), driver_version, num_cpus, temp, clock); os_name(), driver_version, num_cpus, cputc, cpuclk);
strcat(buffer, buf); strcat(buffer, buf);
} }
@ -341,9 +338,9 @@ static char *gethistory(char *params)
for (int i = 0; i < records; i++) { for (int i = 0; i < records; i++) {
time_t ts = data[i].tm_stat; time_t ts = data[i].tm_stat;
p += sprintf(p, "GPU=%d;H=%u;KHS=%.2f;DIFF=%.6f;" p += sprintf(p, "GPU=%d;H=%u;KHS=%.2f;DIFF=%.6f;"
"COUNT=%u;FOUND=%u;TS=%u|", "COUNT=%u;FOUND=%u;ID=%u;TS=%u|",
data[i].gpu_id, data[i].height, data[i].hashrate, data[i].difficulty, data[i].gpu_id, data[i].height, data[i].hashrate, data[i].difficulty,
data[i].hashcount, data[i].hashfound, (uint32_t)ts); data[i].hashcount, data[i].hashfound, data[i].uid, (uint32_t)ts);
} }
return buffer; return buffer;
} }

2
blake32.cu

@ -425,7 +425,7 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt
} }
if (!init[thr_id]) { if (!init[thr_id]) {
if (num_processors > 1) if (active_gpus > 1)
cudaSetDevice(device_map[thr_id]); cudaSetDevice(device_map[thr_id]);
CUDA_CALL_OR_RET_X(cudaMallocHost(&h_resNonce[thr_id], NBN * sizeof(uint32_t)), 0); CUDA_CALL_OR_RET_X(cudaMallocHost(&h_resNonce[thr_id], NBN * sizeof(uint32_t)), 0);
CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonce[thr_id], NBN * sizeof(uint32_t)), 0); CUDA_CALL_OR_RET_X(cudaMalloc(&d_resNonce[thr_id], NBN * sizeof(uint32_t)), 0);

38
ccminer.cpp

@ -192,7 +192,6 @@ bool want_stratum = true;
bool have_stratum = false; bool have_stratum = false;
static bool submit_old = false; static bool submit_old = false;
bool use_syslog = false; bool use_syslog = false;
static char* opt_syslog_pfx = (char*) PACKAGE_NAME;
bool use_colors = true; bool use_colors = true;
static bool opt_background = false; static bool opt_background = false;
bool opt_quiet = false; bool opt_quiet = false;
@ -208,10 +207,10 @@ static double opt_difficulty = 1; // CH
bool opt_trust_pool = false; bool opt_trust_pool = false;
uint16_t opt_vote = 9999; uint16_t opt_vote = 9999;
int num_cpus; int num_cpus;
int num_processors; int active_gpus;
int device_map[8] = {0,1,2,3,4,5,6,7}; // CB char * device_name[8];
char *device_name[8]; // CB short device_map[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
int device_sm[8]; long device_sm[8] = { 0 };
char *rpc_user = NULL; char *rpc_user = NULL;
static char *rpc_url; static char *rpc_url;
static char *rpc_userpass; static char *rpc_userpass;
@ -241,8 +240,9 @@ int opt_statsavg = 30;
int opt_intensity = 0; int opt_intensity = 0;
uint32_t opt_work_size = 0; /* default */ uint32_t opt_work_size = 0; /* default */
uint32_t opt_work_adds = 0; uint32_t opt_work_adds = 0;
// strdup on char* to allow a common free() if used
char *opt_api_allow = (char*) "127.0.0.1"; /* 0.0.0.0 for all ips */ static char* opt_syslog_pfx = strdup(PROGRAM_NAME);
char *opt_api_allow = strdup("127.0.0.1"); /* 0.0.0.0 for all ips */
int opt_api_listen = 4068; /* 0 to disable */ int opt_api_listen = 4068; /* 0 to disable */
#ifdef HAVE_GETOPT_LONG #ifdef HAVE_GETOPT_LONG
@ -409,6 +409,8 @@ void proper_exit(int reason)
if (hnvml) if (hnvml)
wrap_nvml_destroy(hnvml); wrap_nvml_destroy(hnvml);
#endif #endif
free(opt_syslog_pfx);
free(opt_api_allow);
exit(reason); exit(reason);
} }
@ -1671,6 +1673,7 @@ static void parse_arg(int key, char *arg)
if (p) { if (p) {
/* ip:port */ /* ip:port */
if (p - arg > 0) { if (p - arg > 0) {
free(opt_api_allow);
opt_api_allow = strdup(arg); opt_api_allow = strdup(arg);
opt_api_allow[p - arg] = '\0'; opt_api_allow[p - arg] = '\0';
} }
@ -1872,8 +1875,10 @@ static void parse_arg(int key, char *arg)
case 1008: case 1008:
applog(LOG_INFO, "Now logging to syslog..."); applog(LOG_INFO, "Now logging to syslog...");
use_syslog = true; use_syslog = true;
if (arg && strlen(arg)) if (arg && strlen(arg)) {
free(opt_syslog_pfx);
opt_syslog_pfx = strdup(arg); opt_syslog_pfx = strdup(arg);
}
break; break;
case 'd': // CB case 'd': // CB
{ {
@ -1899,7 +1904,7 @@ static void parse_arg(int key, char *arg)
} }
} }
// set number of active gpus // set number of active gpus
num_processors = opt_n_threads; active_gpus = opt_n_threads;
pch = strtok (NULL, ","); pch = strtok (NULL, ",");
} }
} }
@ -2079,7 +2084,7 @@ int main(int argc, char *argv[])
num_cpus = 1; num_cpus = 1;
// number of gpus // number of gpus
num_processors = cuda_num_devices(); active_gpus = cuda_num_devices();
cuda_devicenames(); cuda_devicenames();
/* parse command line */ /* parse command line */
@ -2133,12 +2138,12 @@ int main(int argc, char *argv[])
SetConsoleCtrlHandler((PHANDLER_ROUTINE)ConsoleHandler, TRUE); SetConsoleCtrlHandler((PHANDLER_ROUTINE)ConsoleHandler, TRUE);
#endif #endif
if (num_processors == 0) { if (active_gpus == 0) {
applog(LOG_ERR, "No CUDA devices found! terminating."); applog(LOG_ERR, "No CUDA devices found! terminating.");
exit(1); exit(1);
} }
if (!opt_n_threads) if (!opt_n_threads)
opt_n_threads = num_processors; opt_n_threads = active_gpus;
#ifdef HAVE_SYSLOG_H #ifdef HAVE_SYSLOG_H
if (use_syslog) if (use_syslog)
@ -2208,7 +2213,7 @@ int main(int argc, char *argv[])
#ifdef USE_WRAPNVML #ifdef USE_WRAPNVML
#ifndef WIN32 #ifndef WIN32
/* nvml is currently not usable on Windows (even for x64) */ /* nvml is currently not the best choice on Windows (only in x64) */
hnvml = wrap_nvml_create(); hnvml = wrap_nvml_create();
if (hnvml) if (hnvml)
applog(LOG_INFO, "NVML GPU monitoring enabled."); applog(LOG_INFO, "NVML GPU monitoring enabled.");
@ -2241,8 +2246,9 @@ int main(int argc, char *argv[])
thr = &thr_info[i]; thr = &thr_info[i];
thr->id = i; thr->id = i;
thr->gpu.gpu_id = device_map[i];
thr->gpu.thr_id = i; thr->gpu.thr_id = i;
thr->gpu.gpu_id = (uint8_t) device_map[i];
thr->gpu.gpu_arch = (uint16_t) device_sm[device_map[i]];
thr->q = tq_new(); thr->q = tq_new();
if (!thr->q) if (!thr->q)
return 1; return 1;
@ -2253,9 +2259,9 @@ int main(int argc, char *argv[])
} }
} }
applog(LOG_INFO, "%d miner threads started, " applog(LOG_INFO, "%d miner thread%s started, "
"using '%s' algorithm.", "using '%s' algorithm.",
opt_n_threads, opt_n_threads, opt_n_threads > 1 ? "s":"",
algo_names[opt_algo]); algo_names[opt_algo]);
#ifdef WIN32 #ifdef WIN32

12
ccminer.vcxproj

@ -172,7 +172,7 @@
<CInterleavedPTX>false</CInterleavedPTX> <CInterleavedPTX>false</CInterleavedPTX>
<MaxRegCount>80</MaxRegCount> <MaxRegCount>80</MaxRegCount>
<PtxAsOptionV>true</PtxAsOptionV> <PtxAsOptionV>true</PtxAsOptionV>
<Keep>true</Keep> <Keep>false</Keep>
<CodeGeneration>compute_30,sm_30;compute_50,sm_50</CodeGeneration> <CodeGeneration>compute_30,sm_30;compute_50,sm_50</CodeGeneration>
<AdditionalOptions>--ptxas-options="-O2" %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions>--ptxas-options="-O2" %(AdditionalOptions)</AdditionalOptions>
<Defines> <Defines>
@ -215,7 +215,7 @@
<CInterleavedPTX>false</CInterleavedPTX> <CInterleavedPTX>false</CInterleavedPTX>
<MaxRegCount>80</MaxRegCount> <MaxRegCount>80</MaxRegCount>
<PtxAsOptionV>true</PtxAsOptionV> <PtxAsOptionV>true</PtxAsOptionV>
<Keep>true</Keep> <Keep>false</Keep>
<CodeGeneration>compute_50,sm_50</CodeGeneration> <CodeGeneration>compute_50,sm_50</CodeGeneration>
<Include> <Include>
</Include> </Include>
@ -366,7 +366,7 @@
<CudaCompile Include="heavy\heavy.cu"> <CudaCompile Include="heavy\heavy.cu">
</CudaCompile> </CudaCompile>
<CudaCompile Include="JHA\cuda_jha_compactionTest.cu"> <CudaCompile Include="JHA\cuda_jha_compactionTest.cu">
<AdditionalOptions Condition="'$(Configuration)'=='Release'">-Xptxas "-abi=yes -O2" %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions Condition="'$(Configuration)'=='Release'">-Xptxas "-abi=yes" %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)'=='Debug'">-Xptxas "-abi=yes" %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions Condition="'$(Configuration)'=='Debug'">-Xptxas "-abi=yes" %(AdditionalOptions)</AdditionalOptions>
</CudaCompile> </CudaCompile>
<CudaCompile Include="JHA\cuda_jha_keccak512.cu"> <CudaCompile Include="JHA\cuda_jha_keccak512.cu">
@ -375,7 +375,7 @@
</CudaCompile> </CudaCompile>
<CudaCompile Include="blake32.cu"> <CudaCompile Include="blake32.cu">
<MaxRegCount>64</MaxRegCount> <MaxRegCount>64</MaxRegCount>
<AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options="-O2 -dlcm=cg" %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options="-dlcm=cg" %(AdditionalOptions)</AdditionalOptions>
<FastMath>true</FastMath> <FastMath>true</FastMath>
</CudaCompile> </CudaCompile>
<CudaCompile Include="keccak\cuda_keccak256.cu"> <CudaCompile Include="keccak\cuda_keccak256.cu">
@ -384,7 +384,7 @@
<CudaCompile Include="keccak\keccak256.cu" /> <CudaCompile Include="keccak\keccak256.cu" />
<CudaCompile Include="pentablake.cu"> <CudaCompile Include="pentablake.cu">
<MaxRegCount>80</MaxRegCount> <MaxRegCount>80</MaxRegCount>
<AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options="-O2 -dlcm=cg" %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions Condition="'$(Configuration)'=='Release'">--ptxas-options="-dlcm=cg" %(AdditionalOptions)</AdditionalOptions>
<FastMath>true</FastMath> <FastMath>true</FastMath>
</CudaCompile> </CudaCompile>
<CudaCompile Include="quark\animecoin.cu"> <CudaCompile Include="quark\animecoin.cu">
@ -402,7 +402,7 @@
<CInterleavedPTX>false</CInterleavedPTX> <CInterleavedPTX>false</CInterleavedPTX>
</CudaCompile> </CudaCompile>
<CudaCompile Include="quark\cuda_quark_compactionTest.cu"> <CudaCompile Include="quark\cuda_quark_compactionTest.cu">
<AdditionalOptions Condition="'$(Configuration)'=='Release'">-Xptxas "-abi=yes -O2" %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions Condition="'$(Configuration)'=='Release'">-Xptxas "-abi=yes" %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions Condition="'$(Configuration)'=='Debug'">-Xptxas "-abi=yes" %(AdditionalOptions)</AdditionalOptions> <AdditionalOptions Condition="'$(Configuration)'=='Debug'">-Xptxas "-abi=yes" %(AdditionalOptions)</AdditionalOptions>
</CudaCompile> </CudaCompile>
<CudaCompile Include="quark\cuda_quark_groestl512.cu"> <CudaCompile Include="quark\cuda_quark_groestl512.cu">

6
cuda.cpp

@ -25,10 +25,6 @@
#include "compat.h" // sleep #include "compat.h" // sleep
#endif #endif
extern char *device_name[8];
extern int device_map[8];
extern int device_sm[8];
// CUDA Devices on the System // CUDA Devices on the System
int cuda_num_devices() int cuda_num_devices()
{ {
@ -74,7 +70,7 @@ void cuda_devicenames()
cudaGetDeviceProperties(&props, device_map[i]); cudaGetDeviceProperties(&props, device_map[i]);
device_name[i] = strdup(props.name); device_name[i] = strdup(props.name);
device_sm[i] = props.major * 100 + props.minor * 10; device_sm[i] = (props.major * 100 + props.minor * 10);
} }
} }

4
cuda_helper.h

@ -13,8 +13,8 @@
#include <stdint.h> #include <stdint.h>
extern int device_map[8]; extern "C" short device_map[8];
extern int device_sm[8]; extern "C" long device_sm[8];
// common functions // common functions
extern void cuda_check_cpu_init(int thr_id, int threads); extern void cuda_check_cpu_init(int thr_id, int threads);

3
fuguecoin.cpp

@ -13,9 +13,6 @@ extern "C" void my_fugue256(void *cc, const void *data, size_t len);
extern "C" void my_fugue256_close(void *cc, void *dst); extern "C" void my_fugue256_close(void *cc, void *dst);
extern "C" void my_fugue256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst); extern "C" void my_fugue256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst);
extern int device_map[8];
extern int device_sm[8];
// vorbereitete Kontexte nach den ersten 80 Bytes // vorbereitete Kontexte nach den ersten 80 Bytes
sph_fugue256_context ctx_fugue_const[8]; sph_fugue256_context ctx_fugue_const[8];

8
miner.h

@ -386,6 +386,7 @@ struct cgpu_info {
uint8_t has_monitoring; uint8_t has_monitoring;
float gpu_temp; float gpu_temp;
int gpu_fan; int gpu_fan;
uint16_t gpu_arch;
int gpu_clock; int gpu_clock;
int gpu_memclock; int gpu_memclock;
size_t gpu_mem; size_t gpu_mem;
@ -410,6 +411,7 @@ struct thr_api {
}; };
struct stats_data { struct stats_data {
uint32_t uid;
uint32_t tm_stat; uint32_t tm_stat;
uint32_t hashcount; uint32_t hashcount;
uint32_t height; uint32_t height;
@ -454,7 +456,7 @@ extern bool opt_protocol;
extern bool opt_tracegpu; extern bool opt_tracegpu;
extern int opt_intensity; extern int opt_intensity;
extern int opt_n_threads; extern int opt_n_threads;
extern int num_processors; extern int active_gpus;
extern int opt_timeout; extern int opt_timeout;
extern bool want_longpoll; extern bool want_longpoll;
extern bool have_longpoll; extern bool have_longpoll;
@ -478,6 +480,10 @@ extern uint32_t opt_work_size;
extern uint64_t global_hashrate; extern uint64_t global_hashrate;
extern double global_diff; extern double global_diff;
extern char* device_name[8];
extern short device_map[8];
extern long device_sm[8];
#define CL_N "\x1B[0m" #define CL_N "\x1B[0m"
#define CL_RED "\x1B[31m" #define CL_RED "\x1B[31m"
#define CL_GRN "\x1B[32m" #define CL_GRN "\x1B[32m"

2
pentablake.cu

@ -511,7 +511,7 @@ extern "C" int scanhash_pentablake(int thr_id, uint32_t *pdata, const uint32_t *
((uint32_t*)ptarget)[7] = 0x000F; ((uint32_t*)ptarget)[7] = 0x000F;
if (!init[thr_id]) { if (!init[thr_id]) {
if (num_processors > 1) { if (active_gpus > 1) {
cudaSetDevice(device_map[thr_id]); cudaSetDevice(device_map[thr_id]);
} }
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 64 * throughput)); CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 64 * throughput));

3
quark/cuda_bmw512.cu

@ -3,9 +3,6 @@
#include "cuda_helper.h" #include "cuda_helper.h"
// aus heavy.cu
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
// die Message it Padding zur Berechnung auf der GPU // die Message it Padding zur Berechnung auf der GPU
__constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding) __constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding)

3
quark/cuda_jh512.cu

@ -1,8 +1,5 @@
#include "cuda_helper.h" #include "cuda_helper.h"
// aus heavy.cu
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
typedef struct { typedef struct {
uint32_t x[8][4]; /*the 1024-bit state, ( x[i][0] || x[i][1] || x[i][2] || x[i][3] ) is the ith row of the state in the pseudocode*/ uint32_t x[8][4]; /*the 1024-bit state, ( x[i][0] || x[i][1] || x[i][2] || x[i][3] ) is the ith row of the state in the pseudocode*/
uint32_t buffer[16]; /*the 512-bit message block to be hashed;*/ uint32_t buffer[16]; /*the 512-bit message block to be hashed;*/

13
quark/cuda_quark_compactionTest.cu

@ -8,10 +8,7 @@ static uint32_t *d_tempBranch1Nonces[8];
static uint32_t *d_numValid[8]; static uint32_t *d_numValid[8];
static uint32_t *h_numValid[8]; static uint32_t *h_numValid[8];
static uint32_t *d_partSum[2][8]; // für bis zu vier partielle Summen static uint32_t *d_partSum[2][8]; // für bis zu vier partielle Summen
// aus heavy.cu
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
// True/False tester // True/False tester
typedef uint32_t(*cuda_compactTestFunction_t)(uint32_t *inpHash); typedef uint32_t(*cuda_compactTestFunction_t)(uint32_t *inpHash);
@ -83,7 +80,7 @@ __global__ void quark_compactTest_gpu_SCAN(uint32_t *data, int width, uint32_t *
inpHash = &inpHashes[id<<4]; inpHash = &inpHashes[id<<4];
}else }else
{ {
// Nonce-Liste verfügbar // Nonce-Liste verfügbar
int nonce = d_validNonceTable[id] - startNounce; int nonce = d_validNonceTable[id] - startNounce;
inpHash = &inpHashes[nonce<<4]; inpHash = &inpHashes[nonce<<4];
} }
@ -200,7 +197,7 @@ __global__ void quark_compactTest_gpu_SCATTER(uint32_t *sum, uint32_t *outp, cud
inpHash = &inpHashes[id<<4]; inpHash = &inpHashes[id<<4];
}else }else
{ {
// Nonce-Liste verfügbar // Nonce-Liste verfügbar
int nonce = d_validNonceTable[id] - startNounce; int nonce = d_validNonceTable[id] - startNounce;
actNounce = nonce; actNounce = nonce;
inpHash = &inpHashes[nonce<<4]; inpHash = &inpHashes[nonce<<4];
@ -335,7 +332,7 @@ __host__ void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t st
int order) int order)
{ {
// Wenn validNonceTable genutzt wird, dann werden auch nur die Nonces betrachtet, die dort enthalten sind // Wenn validNonceTable genutzt wird, dann werden auch nur die Nonces betrachtet, die dort enthalten sind
// "threads" ist in diesem Fall auf die Länge dieses Array's zu setzen! // "threads" ist in diesem Fall auf die Länge dieses Array's zu setzen!
quark_compactTest_cpu_dualCompaction(thr_id, threads, quark_compactTest_cpu_dualCompaction(thr_id, threads,
h_numValid[thr_id], d_nonces1, d_nonces2, h_numValid[thr_id], d_nonces1, d_nonces2,
@ -351,7 +348,7 @@ __host__ void quark_compactTest_single_false_cpu_hash_64(int thr_id, int threads
int order) int order)
{ {
// Wenn validNonceTable genutzt wird, dann werden auch nur die Nonces betrachtet, die dort enthalten sind // Wenn validNonceTable genutzt wird, dann werden auch nur die Nonces betrachtet, die dort enthalten sind
// "threads" ist in diesem Fall auf die Länge dieses Array's zu setzen! // "threads" ist in diesem Fall auf die Länge dieses Array's zu setzen!
quark_compactTest_cpu_singleCompaction(thr_id, threads, h_numValid[thr_id], d_nonces1, h_QuarkFalseFunction[thr_id], startNounce, inpHashes, d_validNonceTable); quark_compactTest_cpu_singleCompaction(thr_id, threads, h_numValid[thr_id], d_nonces1, h_QuarkFalseFunction[thr_id], startNounce, inpHashes, d_validNonceTable);

3
quark/cuda_quark_keccak512.cu

@ -3,9 +3,6 @@
#include "cuda_helper.h" #include "cuda_helper.h"
// heavy.cu
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
#define U32TO64_LE(p) \ #define U32TO64_LE(p) \
(((uint64_t)(*p)) | (((uint64_t)(*(p + 1))) << 32)) (((uint64_t)(*p)) | (((uint64_t)(*(p + 1))) << 32))

4
stats.cpp

@ -19,10 +19,9 @@ static uint64_t uid = 0;
extern uint64_t global_hashrate; extern uint64_t global_hashrate;
extern int opt_statsavg; extern int opt_statsavg;
extern int device_map[8];
/** /**
* Store speed per thread (todo: compute vardiff ?) * Store speed per thread
*/ */
void stats_remember_speed(int thr_id, uint32_t hashcount, double hashrate, uint8_t found, uint32_t height) void stats_remember_speed(int thr_id, uint32_t hashcount, double hashrate, uint8_t found, uint32_t height)
{ {
@ -38,6 +37,7 @@ void stats_remember_speed(int thr_id, uint32_t hashcount, double hashrate, uint8
return; return;
memset(&data, 0, sizeof(data)); memset(&data, 0, sizeof(data));
data.uid = uid;
data.gpu_id = gpu; data.gpu_id = gpu;
data.thr_id = (uint8_t)thr_id; data.thr_id = (uint8_t)thr_id;
data.tm_stat = (uint32_t) time(NULL); data.tm_stat = (uint32_t) time(NULL);

3
x11/cuda_x11_cubehash512.cu

@ -1,8 +1,5 @@
#include "cuda_helper.h" #include "cuda_helper.h"
// aus heavy.cu
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
typedef unsigned char BitSequence; typedef unsigned char BitSequence;
#define CUBEHASH_ROUNDS 16 /* this is r for CubeHashr/b */ #define CUBEHASH_ROUNDS 16 /* this is r for CubeHashr/b */

3
x13/cuda_x13_fugue512.cu

@ -7,9 +7,6 @@
*/ */
#include "cuda_helper.h" #include "cuda_helper.h"
// aus heavy.cu
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
/* /*
* X13 kernel implementation. * X13 kernel implementation.
* *

4
x15/cuda_x14_shabal512.cu

@ -1,10 +1,8 @@
/* /*
* Shabal-512 for X14/X15 (STUB) * Shabal-512 for X14/X15
*/ */
#include "cuda_helper.h" #include "cuda_helper.h"
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
/* $Id: shabal.c 175 2010-05-07 16:03:20Z tp $ */ /* $Id: shabal.c 175 2010-05-07 16:03:20Z tp $ */
/* /*
* Shabal implementation. * Shabal implementation.

2
x15/cuda_x15_whirlpool.cu

@ -11,8 +11,6 @@
#include "cuda_helper.h" #include "cuda_helper.h"
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
__constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding) __constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding)
__constant__ uint32_t pTarget[8]; __constant__ uint32_t pTarget[8];

3
x17/cuda_x17_haval512.cu

@ -48,9 +48,6 @@
#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF)) #define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF))
// in heavy.cu
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
static __constant__ uint32_t initVector[8]; static __constant__ uint32_t initVector[8];
static const uint32_t c_initVector[8] = { static const uint32_t c_initVector[8] = {

3
x17/cuda_x17_sha512.cu

@ -47,9 +47,6 @@
#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) #define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF)) #define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF))
// in heavy.cu
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
static __constant__ uint64_t H_512[8]; static __constant__ uint64_t H_512[8];
static const uint64_t H512[8] = { static const uint64_t H512[8] = {

Loading…
Cancel
Save