Browse Source

Allow different intensity per device

and clean the old variables, no more required
master
Tanguy Pruvot 10 years ago
parent
commit
26b51a557b
  1. 3
      Algo256/blake256.cu
  2. 3
      Algo256/keccak256.cu
  3. 7
      JHA/jackpotcoin.cu
  4. 16
      api.cpp
  5. 32
      ccminer.cpp
  6. 7
      cuda.cpp
  7. 7
      cuda_nist5.cu
  8. 3
      fuguecoin.cpp
  9. 3
      groestlcoin.cpp
  10. 3
      heavy/heavy.cu
  11. 3
      lyra2/lyra2RE.cu
  12. 6
      miner.h
  13. 7
      myriadgroestl.cpp
  14. 3
      pentablake.cu
  15. 3
      quark/animecoin.cu
  16. 3
      quark/quarkcoin.cu
  17. 5
      qubit/deep.cu
  18. 3
      qubit/doom.cu
  19. 3
      qubit/qubit.cu
  20. 3
      x11/fresh.cu
  21. 3
      x11/s3.cu
  22. 3
      x11/x11.cu
  23. 3
      x13/x13.cu
  24. 3
      x15/whirlpool.cu
  25. 4
      x15/x14.cu
  26. 3
      x15/x15.cu
  27. 4
      x17/x17.cu

3
Algo256/blake256.cu

@ -391,8 +391,7 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt @@ -391,8 +391,7 @@ extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *pt
uint32_t crcsum;
#endif
int intensity = (device_sm[device_map[thr_id]] > 500) ? 22 : 20;
uint32_t throughput = opt_work_size ? opt_work_size : (1 << intensity);
apiReportThroughput(thr_id, (uint32_t) throughput);
uint32_t throughput = device_intensity(thr_id, __func__, 1U << intensity);
throughput = min(throughput, max_nonce - first_nonce);
int rc = 0;

3
Algo256/keccak256.cu

@ -41,8 +41,7 @@ extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata, @@ -41,8 +41,7 @@ extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata,
unsigned long *hashes_done)
{
const uint32_t first_nonce = pdata[19];
uint32_t throughput = opt_work_size ? opt_work_size : (1 << 21); // 256*256*8*4
apiReportThroughput(thr_id, throughput);
uint32_t throughput = device_intensity(thr_id, __func__, 1U << 21); // 256*256*8*4
throughput = min(throughput, (max_nonce - first_nonce));
if (opt_benchmark)

7
JHA/jackpotcoin.cu

@ -93,13 +93,12 @@ extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata, @@ -93,13 +93,12 @@ extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata,
{
const uint32_t first_nonce = pdata[19];
int throughput = (int) device_intensity(thr_id, __func__, 1U << 20);
throughput = min(throughput, (int)(max_nonce - first_nonce));
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x000f;
int throughput = opt_work_size ? opt_work_size : (1 << 20); // 256*4096
apiReportThroughput(thr_id, (uint32_t) throughput);
throughput = min(throughput, (int)(max_nonce - first_nonce));
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);

16
api.cpp

@ -94,7 +94,6 @@ extern char *opt_api_allow; @@ -94,7 +94,6 @@ extern char *opt_api_allow;
extern int opt_api_listen; /* port */
extern uint32_t accepted_count;
extern uint32_t rejected_count;
extern uint32_t opt_work_adds;
extern int num_cpus;
extern struct stratum_ctx stratum;
extern char* rpc_user;
@ -877,24 +876,19 @@ void *api_thread(void *userdata) @@ -877,24 +876,19 @@ void *api_thread(void *userdata)
}
/* to be able to report the default value set in each algo */
void apiReportThroughput(int thr_id, uint32_t throughput)
void api_set_throughput(int thr_id, uint32_t throughput)
{
struct cgpu_info *cgpu = &thr_info[thr_id].gpu;
if (cgpu) {
cgpu->throughput = throughput;
if (opt_intensity == 0) {
uint8_t i = 0;
uint32_t ws = throughput;
uint8_t i = 0;
cgpu->throughput = throughput;
while (ws > 1 && i++ < 32)
ws = ws >> 1;
cgpu->intensity_int = i;
cgpu->intensity = (float) i;
} else {
cgpu->intensity_int = (uint8_t) opt_intensity;
cgpu->intensity = (float) opt_intensity;
if (opt_work_adds) {
cgpu->intensity += ((float) opt_work_adds / (1U << opt_intensity));
}
if (i && (1U << i) < throughput) {
cgpu->intensity += ((float) (throughput-(1U << i)) / (1U << i));
}
}
}

32
ccminer.cpp

@ -171,6 +171,7 @@ int active_gpus; @@ -171,6 +171,7 @@ int active_gpus;
char * device_name[MAX_GPUS];
short device_map[MAX_GPUS] = { 0 };
long device_sm[MAX_GPUS] = { 0 };
uint32_t gpus_intensity[MAX_GPUS] = { 0 };
char *rpc_user = NULL;
static char *rpc_url;
static char *rpc_userpass;
@ -197,9 +198,6 @@ static double *thr_hashrates; @@ -197,9 +198,6 @@ static double *thr_hashrates;
uint64_t global_hashrate = 0;
double global_diff = 0.0;
int opt_statsavg = 30;
int opt_intensity = 0;
uint32_t opt_work_size = 0; /* default */
uint32_t opt_work_adds = 0;
// strdup on char* to allow a common free() if used
static char* opt_syslog_pfx = strdup(PROGRAM_NAME);
char *opt_api_allow = strdup("127.0.0.1"); /* 0.0.0.0 for all ips */
@ -1831,16 +1829,32 @@ static void parse_arg(int key, char *arg) @@ -1831,16 +1829,32 @@ static void parse_arg(int key, char *arg)
v = (uint32_t) d;
if (v < 0 || v > 31)
show_usage_and_exit(1);
opt_intensity = v;
{
int n = 0, adds = 0;
int ngpus = cuda_num_devices();
char * pch = strtok(arg,",");
if (pch == NULL) {
for (n=0; n < ngpus; n++)
gpus_intensity[n] = (1 << v);
break;
}
while (pch != NULL) {
d = atof(pch);
v = (uint32_t) d;
if (v > 7) { /* 0 = default */
opt_work_size = (1 << v);
gpus_intensity[n] = (1 << v);
if ((d - v) > 0.0) {
opt_work_adds = (uint32_t) floor((d - v) * (1 << (v-8))) * 256;
opt_work_size += opt_work_adds;
adds = (uint32_t) floor((d - v) * (1 << (v-8))) * 256;
gpus_intensity[n] += adds;
applog(LOG_INFO, "Adding %u threads to intensity %u, %u cuda threads",
opt_work_adds, v, opt_work_size);
adds, v, gpus_intensity[n]);
} else {
applog(LOG_INFO, "Intensity set to %u, %u cuda threads", v, opt_work_size);
applog(LOG_INFO, "Intensity set to %u, %u cuda threads",
v, gpus_intensity[n]);
}
}
n++;
pch = strtok(NULL, ",");
}
}
break;

7
cuda.cpp

@ -115,6 +115,13 @@ int cuda_finddevice(char *name) @@ -115,6 +115,13 @@ int cuda_finddevice(char *name)
return -1;
}
uint32_t device_intensity(int thr_id, const char *func, uint32_t defcount)
{
uint32_t throughput = gpus_intensity[thr_id] ? gpus_intensity[thr_id] : defcount;
api_set_throughput(thr_id, throughput);
return throughput;
}
// Zeitsynchronisations-Routine von cudaminer mit CPU sleep
typedef struct { double value[8]; } tsumarray;
cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id)

7
cuda_nist5.cu

@ -72,13 +72,12 @@ extern "C" int scanhash_nist5(int thr_id, uint32_t *pdata, @@ -72,13 +72,12 @@ extern "C" int scanhash_nist5(int thr_id, uint32_t *pdata,
{
const uint32_t first_nonce = pdata[19];
int throughput = (int) device_intensity(thr_id, __func__, 1 << 20); // 256*256*16
throughput = min(throughput, (int) (max_nonce - first_nonce));
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x00FF;
int throughput = opt_work_size ? opt_work_size : (1 << 20); // 256*4096
apiReportThroughput(thr_id, (uint32_t) throughput);
throughput = min(throughput, (int) (max_nonce - first_nonce));
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);

3
fuguecoin.cpp

@ -27,8 +27,7 @@ extern "C" int scanhash_fugue256(int thr_id, uint32_t *pdata, const uint32_t *pt @@ -27,8 +27,7 @@ extern "C" int scanhash_fugue256(int thr_id, uint32_t *pdata, const uint32_t *pt
{
uint32_t start_nonce = pdata[19]++;
int intensity = (device_sm[device_map[thr_id]] > 500) ? 22 : 19;
uint32_t throughput = opt_work_size ? opt_work_size : (1 << intensity);
apiReportThroughput(thr_id, throughput);
int throughput = (int) device_intensity(thr_id, __func__, 1 << intensity); // 256*256*8
throughput = min(throughput, max_nonce - start_nonce);
if (opt_benchmark)

3
groestlcoin.cpp

@ -64,8 +64,7 @@ extern "C" int scanhash_groestlcoin(int thr_id, uint32_t *pdata, const uint32_t @@ -64,8 +64,7 @@ extern "C" int scanhash_groestlcoin(int thr_id, uint32_t *pdata, const uint32_t
uint32_t max_nonce, unsigned long *hashes_done)
{
uint32_t start_nonce = pdata[19]++;
uint32_t throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*2048
apiReportThroughput(thr_id, throughput);
uint32_t throughput = device_intensity(thr_id, __func__, 1 << 19); // 256*256*8
throughput = min(throughput, max_nonce - start_nonce);
uint32_t *outputHash = (uint32_t*)malloc(throughput * 16 * sizeof(uint32_t));

3
heavy/heavy.cu

@ -136,8 +136,7 @@ int scanhash_heavy(int thr_id, uint32_t *pdata, @@ -136,8 +136,7 @@ int scanhash_heavy(int thr_id, uint32_t *pdata,
{
const uint32_t first_nonce = pdata[19];
// CUDA will process thousands of threads.
int throughput = opt_work_size ? opt_work_size : (1 << 19) - 256; // 256*2048
apiReportThroughput(thr_id, (uint32_t) throughput);
int throughput = (int) device_intensity(thr_id, __func__, (1U << 19) - 256);
throughput = min(throughput, (int)(max_nonce - first_nonce));
int rc = 0;

3
lyra2/lyra2RE.cu

@ -63,8 +63,7 @@ extern "C" int scanhash_lyra2(int thr_id, uint32_t *pdata, @@ -63,8 +63,7 @@ extern "C" int scanhash_lyra2(int thr_id, uint32_t *pdata,
{
const uint32_t first_nonce = pdata[19];
int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 18 : 17;
int throughput = opt_work_size ? opt_work_size : (1 << intensity); // 18=256*256*4;
apiReportThroughput(thr_id, (uint32_t) throughput);
int throughput = (int) device_intensity(thr_id, __func__, 1U << intensity); // 18=256*256*4;
throughput = min(throughput, (int)(max_nonce - first_nonce));
if (opt_benchmark)

6
miner.h

@ -378,7 +378,7 @@ extern int scanhash_x17(int thr_id, uint32_t *pdata, @@ -378,7 +378,7 @@ extern int scanhash_x17(int thr_id, uint32_t *pdata,
/* api related */
void *api_thread(void *userdata);
void apiReportThroughput(int thr_id, uint32_t throughput);
void api_set_throughput(int thr_id, uint32_t throughput);
struct cgpu_info {
uint8_t gpu_id;
@ -462,7 +462,6 @@ extern bool opt_debug; @@ -462,7 +462,6 @@ extern bool opt_debug;
extern bool opt_quiet;
extern bool opt_protocol;
extern bool opt_tracegpu;
extern int opt_intensity;
extern int opt_n_threads;
extern int active_gpus;
extern int opt_timeout;
@ -483,7 +482,6 @@ extern int api_thr_id; @@ -483,7 +482,6 @@ extern int api_thr_id;
extern struct work_restart *work_restart;
extern bool opt_trust_pool;
extern uint16_t opt_vote;
extern uint32_t opt_work_size;
extern uint64_t global_hashrate;
extern double global_diff;
@ -492,6 +490,7 @@ extern double global_diff; @@ -492,6 +490,7 @@ extern double global_diff;
extern char* device_name[MAX_GPUS];
extern short device_map[MAX_GPUS];
extern long device_sm[MAX_GPUS];
extern uint32_t gpus_intensity[MAX_GPUS];
#define CL_N "\x1B[0m"
#define CL_RED "\x1B[31m"
@ -535,6 +534,7 @@ extern int timeval_subtract(struct timeval *result, struct timeval *x, @@ -535,6 +534,7 @@ extern int timeval_subtract(struct timeval *result, struct timeval *x,
extern bool fulltest(const uint32_t *hash, const uint32_t *target);
extern void diff_to_target(uint32_t *target, double diff);
extern void get_currentalgo(char* buf, int sz);
extern uint32_t device_intensity(int thr_id, const char *func, uint32_t defcount);
struct stratum_job {
char *job_id;

7
myriadgroestl.cpp

@ -37,13 +37,8 @@ static bool init[MAX_GPUS] = { 0 }; @@ -37,13 +37,8 @@ static bool init[MAX_GPUS] = { 0 };
extern "C" int scanhash_myriad(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
uint32_t max_nonce, unsigned long *hashes_done)
{
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x000000ff;
uint32_t start_nonce = pdata[19]++;
uint32_t throughput = opt_work_size ? opt_work_size : (1 << 17);
apiReportThroughput(thr_id, throughput);
uint32_t throughput = device_intensity(thr_id, __func__, 1 << 17);
throughput = min(throughput, max_nonce - start_nonce);
uint32_t *outputHash = (uint32_t*)malloc(throughput * 16 * sizeof(uint32_t));

3
pentablake.cu

@ -370,8 +370,7 @@ extern "C" int scanhash_pentablake(int thr_id, uint32_t *pdata, const uint32_t * @@ -370,8 +370,7 @@ extern "C" int scanhash_pentablake(int thr_id, uint32_t *pdata, const uint32_t *
const uint32_t first_nonce = pdata[19];
uint32_t endiandata[20];
int rc = 0;
int throughput = opt_work_size ? opt_work_size : (128 * 2560); // 18.5
apiReportThroughput(thr_id, (uint32_t) throughput);
int throughput = (int) device_intensity(thr_id, __func__, 128U * 2560); // 18.5
throughput = min(throughput, (int)(max_nonce - first_nonce));
if (opt_benchmark)

3
quark/animecoin.cu

@ -167,8 +167,7 @@ extern "C" int scanhash_anime(int thr_id, uint32_t *pdata, @@ -167,8 +167,7 @@ extern "C" int scanhash_anime(int thr_id, uint32_t *pdata,
unsigned long *hashes_done)
{
const uint32_t first_nonce = pdata[19];
int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*2048
apiReportThroughput(thr_id, (uint32_t) throughput);
int throughput = (int) device_intensity(thr_id, __func__, 1 << 19); // 256*256*8
throughput = min(throughput, (int)(max_nonce - first_nonce));
if (opt_benchmark)

3
quark/quarkcoin.cu

@ -138,8 +138,7 @@ extern "C" int scanhash_quark(int thr_id, uint32_t *pdata, @@ -138,8 +138,7 @@ extern "C" int scanhash_quark(int thr_id, uint32_t *pdata,
{
const uint32_t first_nonce = pdata[19];
int throughput = opt_work_size ? opt_work_size : (1 << 20); // 256*4096
apiReportThroughput(thr_id, (uint32_t) throughput);
int throughput = (int) device_intensity(thr_id, __func__, 1 << 20); // 256*4096
throughput = min(throughput, (int)(max_nonce - first_nonce));
if (opt_benchmark)

5
qubit/deep.cu

@ -60,9 +60,8 @@ extern "C" int scanhash_deep(int thr_id, uint32_t *pdata, @@ -60,9 +60,8 @@ extern "C" int scanhash_deep(int thr_id, uint32_t *pdata,
{
const uint32_t first_nonce = pdata[19];
uint32_t endiandata[20];
int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8
apiReportThroughput(thr_id, (uint32_t) throughput);
throughput = min(throughput, (int)(max_nonce - first_nonce));
int throughput = (int) device_intensity(thr_id, __func__, 1U << 19); // 256*256*8
throughput = min(throughput, (int) (max_nonce - first_nonce));
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x0000f;

3
qubit/doom.cu

@ -40,8 +40,7 @@ extern "C" int scanhash_doom(int thr_id, uint32_t *pdata, @@ -40,8 +40,7 @@ extern "C" int scanhash_doom(int thr_id, uint32_t *pdata,
{
const uint32_t first_nonce = pdata[19];
uint32_t endiandata[20];
uint32_t throughput = opt_work_size ? opt_work_size : (1 << 22); // 256*256*8*8
apiReportThroughput(thr_id, (uint32_t) throughput);
uint32_t throughput = device_intensity(thr_id, __func__, 1U << 22); // 256*256*8*8
throughput = min(throughput, (max_nonce - first_nonce));
if (opt_benchmark)

3
qubit/qubit.cu

@ -80,8 +80,7 @@ extern "C" int scanhash_qubit(int thr_id, uint32_t *pdata, @@ -80,8 +80,7 @@ extern "C" int scanhash_qubit(int thr_id, uint32_t *pdata,
{
uint32_t endiandata[20];
const uint32_t first_nonce = pdata[19];
int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8
apiReportThroughput(thr_id, (uint32_t) throughput);
int throughput = (int) device_intensity(thr_id, __func__, 1U << 19); // 256*256*8
throughput = min(throughput, (int)(max_nonce - first_nonce));
if (opt_benchmark)

3
x11/fresh.cu

@ -77,8 +77,7 @@ extern "C" int scanhash_fresh(int thr_id, uint32_t *pdata, @@ -77,8 +77,7 @@ extern "C" int scanhash_fresh(int thr_id, uint32_t *pdata,
const uint32_t first_nonce = pdata[19];
uint32_t endiandata[20];
int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8;
apiReportThroughput(thr_id, (uint32_t) throughput);
int throughput = (int) device_intensity(thr_id, __func__, 1 << 19);
throughput = min(throughput, (int) (max_nonce - first_nonce));
if (opt_benchmark)

3
x11/s3.cu

@ -62,8 +62,7 @@ extern "C" int scanhash_s3(int thr_id, uint32_t *pdata, @@ -62,8 +62,7 @@ extern "C" int scanhash_s3(int thr_id, uint32_t *pdata,
// reduce by one the intensity on windows
intensity--;
#endif
int throughput = opt_work_size ? opt_work_size : (1 << intensity);
apiReportThroughput(thr_id, (uint32_t) throughput);
int throughput = (int) device_intensity(thr_id, __func__, 1 << intensity);
throughput = min(throughput, (int)(max_nonce - first_nonce));
if (opt_benchmark)

3
x11/x11.cu

@ -133,8 +133,7 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata, @@ -133,8 +133,7 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
{
const uint32_t first_nonce = pdata[19];
int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 20 : 19;
int throughput = opt_work_size ? opt_work_size : (1 << intensity); // 20=256*256*16;
apiReportThroughput(thr_id, (uint32_t) throughput);
int throughput = (int) device_intensity(thr_id, __func__, 1 << intensity); // 19=256*256*8;
throughput = min(throughput, (int)(max_nonce - first_nonce));
if (opt_benchmark)

3
x13/x13.cu

@ -152,8 +152,7 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata, @@ -152,8 +152,7 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
{
const uint32_t first_nonce = pdata[19];
int intensity = 19; // (device_sm[device_map[thr_id]] > 500 && !is_windows()) ? 20 : 19;
int throughput = opt_work_size ? opt_work_size : (1 << intensity); // 19=256*256*8;
apiReportThroughput(thr_id, (uint32_t) throughput);
int throughput = (int) device_intensity(thr_id, __func__, 1 << intensity); // 19=256*256*8;
throughput = min(throughput, (int)(max_nonce - first_nonce));
if (opt_benchmark)

3
x15/whirlpool.cu

@ -57,8 +57,7 @@ extern "C" int scanhash_whc(int thr_id, uint32_t *pdata, @@ -57,8 +57,7 @@ extern "C" int scanhash_whc(int thr_id, uint32_t *pdata,
{
const uint32_t first_nonce = pdata[19];
uint32_t endiandata[20];
int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8;
apiReportThroughput(thr_id, (uint32_t) throughput);
int throughput = (int) device_intensity(thr_id, __func__, 1U << 19); // 19=256*256*8;
throughput = min(throughput, (int)(max_nonce - first_nonce));
if (opt_benchmark)

4
x15/x14.cu

@ -163,8 +163,8 @@ extern "C" int scanhash_x14(int thr_id, uint32_t *pdata, @@ -163,8 +163,8 @@ extern "C" int scanhash_x14(int thr_id, uint32_t *pdata,
{
const uint32_t first_nonce = pdata[19];
uint32_t endiandata[20];
int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8;
apiReportThroughput(thr_id, (uint32_t) throughput);
int throughput = (int) device_intensity(thr_id, __func__, 1U << 19); // 19=256*256*8;
throughput = min(throughput, (int)(max_nonce - first_nonce));
if (opt_benchmark)

3
x15/x15.cu

@ -174,8 +174,7 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata, @@ -174,8 +174,7 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
const uint32_t first_nonce = pdata[19];
uint32_t endiandata[20];
int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8;
apiReportThroughput(thr_id, (uint32_t) throughput);
int throughput = (int) device_intensity(thr_id, __func__, 1U << 19); // 19=256*256*8;
throughput = min(throughput, (int)(max_nonce - first_nonce));
if (opt_benchmark)

4
x17/x17.cu

@ -191,8 +191,8 @@ extern "C" int scanhash_x17(int thr_id, uint32_t *pdata, @@ -191,8 +191,8 @@ extern "C" int scanhash_x17(int thr_id, uint32_t *pdata,
unsigned long *hashes_done)
{
const uint32_t first_nonce = pdata[19];
int throughput = opt_work_size ? opt_work_size : (1 << 19); // 256*256*8;
apiReportThroughput(thr_id, (uint32_t) throughput);
int throughput = (int) device_intensity(thr_id, __func__, 1U << 19); // 19=256*256*8;
throughput = min(throughput, (int)(max_nonce - first_nonce));
if (opt_benchmark)

Loading…
Cancel
Save