Browse Source

Allow devices that are disabled due to overheating to be flagged as recovering instead of disabling them and re-enable them if they're below ideal

temperatures and --no-restart has not been set.
nfactor-troky
Con Kolivas 13 years ago
parent
commit
02295c69c8
  1. 11
      adl.c
  2. 2
      adl.h
  3. 10
      api.c
  4. 4
      bitforce.c
  5. 36
      cgminer.c
  6. 2
      device-cpu.c
  7. 22
      device-gpu.c
  8. 9
      miner.h

11
adl.c

@ -330,7 +330,7 @@ void init_adl(int nDevs) @@ -330,7 +330,7 @@ void init_adl(int nDevs)
continue;
}
if (!gpus[gpu].enabled) {
if (gpus[gpu].deven == DEV_DISABLED) {
gpus[i].gpu_engine =
gpus[i].gpu_memclock =
gpus[i].gpu_vddc =
@ -1025,7 +1025,7 @@ static void fan_autotune(int gpu, int temp, int fanpercent, bool __maybe_unused @@ -1025,7 +1025,7 @@ static void fan_autotune(int gpu, int temp, int fanpercent, bool __maybe_unused
}
}
void gpu_autotune(int gpu, bool *enable)
void gpu_autotune(int gpu, enum dev_enable *denable)
{
int temp, fanpercent, engine, newengine, twintemp = 0;
bool fan_optimal = true;
@ -1068,7 +1068,7 @@ void gpu_autotune(int gpu, bool *enable) @@ -1068,7 +1068,7 @@ void gpu_autotune(int gpu, bool *enable)
if (engine && ga->autoengine) {
if (temp > cgpu->cutofftemp) {
applog(LOG_WARNING, "Hit thermal cutoff limit on GPU %d, disabling!", gpu);
*enable = false;
*denable = DEV_RECOVER;
newengine = ga->minspeed;
} else if (temp > ga->overtemp && engine > ga->minspeed) {
applog(LOG_WARNING, "Overheat detected, decreasing GPU %d clock speed", gpu);
@ -1077,9 +1077,12 @@ void gpu_autotune(int gpu, bool *enable) @@ -1077,9 +1077,12 @@ void gpu_autotune(int gpu, bool *enable)
applog(LOG_DEBUG, "Temperature %d degrees over target, decreasing clock speed", opt_hysteresis);
newengine = engine - ga->lpOdParameters.sEngineClock.iStep;
/* Only try to tune engine speed up if this GPU is not disabled */
} else if (temp < ga->targettemp && engine < ga->maxspeed && *enable) {
} else if (temp < ga->targettemp && engine < ga->maxspeed && *denable == DEV_ENABLED) {
applog(LOG_DEBUG, "Temperature below target, increasing clock speed");
newengine = engine + ga->lpOdParameters.sEngineClock.iStep;
} else if (temp < ga->targettemp && *denable == DEV_RECOVER && opt_restart) {
applog(LOG_NOTICE, "Device recovered to temperature below target, re-enabling");
*denable = DEV_ENABLED;
}
if (newengine > ga->maxspeed)

2
adl.h

@ -17,7 +17,7 @@ int gpu_fanpercent(int gpu); @@ -17,7 +17,7 @@ int gpu_fanpercent(int gpu);
bool gpu_stats(int gpu, float *temp, int *engineclock, int *memclock, float *vddc,
int *activity, int *fanspeed, int *fanpercent, int *powertune);
void change_gpusettings(int gpu);
void gpu_autotune(int gpu, bool *enable);
void gpu_autotune(int gpu, enum dev_enable *denable);
void clear_adl(int nDevs);
#else /* HAVE_ADL */
#define adl_active (0)

10
api.c

@ -522,7 +522,7 @@ static void gpustatus(int gpu, bool isjson) @@ -522,7 +522,7 @@ static void gpustatus(int gpu, bool isjson)
#endif
gt = gv = gm = gc = ga = gf = gp = pt = 0;
if (cgpu->enabled)
if (cgpu->deven != DEV_DISABLED)
enabled = (char *)YES;
else
enabled = (char *)NO;
@ -830,7 +830,7 @@ static void gpuenable(__maybe_unused SOCKETTYPE c, char *param, bool isjson) @@ -830,7 +830,7 @@ static void gpuenable(__maybe_unused SOCKETTYPE c, char *param, bool isjson)
return;
}
if (gpus[id].enabled) {
if (gpus[id].deven != DEV_DISABLED) {
strcpy(io_buffer, message(MSG_ALRENA, id, NULL, isjson));
return;
}
@ -844,7 +844,7 @@ static void gpuenable(__maybe_unused SOCKETTYPE c, char *param, bool isjson) @@ -844,7 +844,7 @@ static void gpuenable(__maybe_unused SOCKETTYPE c, char *param, bool isjson)
return;
}
gpus[id].enabled = true;
gpus[id].deven = DEV_ENABLED;
tq_push(thr->q, &ping);
}
@ -873,12 +873,12 @@ static void gpudisable(__maybe_unused SOCKETTYPE c, char *param, bool isjson) @@ -873,12 +873,12 @@ static void gpudisable(__maybe_unused SOCKETTYPE c, char *param, bool isjson)
return;
}
if (!gpus[id].enabled) {
if (gpus[id].deven == DEV_DISABLED) {
strcpy(io_buffer, message(MSG_ALRDIS, id, NULL, isjson));
return;
}
gpus[id].enabled = false;
gpus[id].deven = DEV_DISABLED;
strcpy(io_buffer, message(MSG_GPUDIS, id, NULL, isjson));
}

4
bitforce.c

@ -119,7 +119,7 @@ static bool bitforce_detect_one(const char *devpath) @@ -119,7 +119,7 @@ static bool bitforce_detect_one(const char *devpath)
bitforce->api = &bitforce_api;
bitforce->device_id = i++;
bitforce->device_path = strdup(devpath);
bitforce->enabled = true;
bitforce->deven = DEV_ENABLED;
bitforce->threads = 1;
return true;
@ -254,7 +254,7 @@ static uint64_t bitforce_scanhash(struct thr_info *thr, struct work *work, uint6 @@ -254,7 +254,7 @@ static uint64_t bitforce_scanhash(struct thr_info *thr, struct work *work, uint6
bitforce->temp = temp;
if (temp > bitforce->cutofftemp) {
applog(LOG_WARNING, "Hit thermal cutoff limit on %s %d, disabling!", bitforce->api->name, bitforce->device_id);
bitforce->enabled = false;
bitforce->deven = DEV_RECOVER;
}
}
}

36
cgminer.c

@ -100,7 +100,7 @@ static const bool opt_time = true; @@ -100,7 +100,7 @@ static const bool opt_time = true;
#ifdef HAVE_OPENCL
int opt_dynamic_interval = 7;
static bool opt_restart = true;
bool opt_restart = true;
static bool opt_nogpu;
#endif
@ -1223,10 +1223,12 @@ static void curses_print_devstatus(int thr_id) @@ -1223,10 +1223,12 @@ static void curses_print_devstatus(int thr_id)
wprintw(statuswin, "DEAD ");
else if (cgpu->status == LIFE_SICK)
wprintw(statuswin, "SICK ");
else if (!cgpu->enabled)
wprintw(statuswin, "OFF ");
else
wprintw(statuswin, "%5.1f", cgpu->rolling);
else if (cgpu->deven == DEV_DISABLED)
wprintw(statuswin, "OFF ");
else if (cgpu->deven == DEV_RECOVER)
wprintw(statuswin, "REST ");
else
wprintw(statuswin, "%5.1f", cgpu->rolling);
adj_width(cgpu->accepted, &awidth);
adj_width(cgpu->rejected, &rwidth);
adj_width(cgpu->hw_errors, &hwwidth);
@ -2380,11 +2382,11 @@ void write_config(FILE *fcfg) @@ -2380,11 +2382,11 @@ void write_config(FILE *fcfg)
if (opt_socks_proxy && *opt_socks_proxy)
fprintf(fcfg, ",\n\"socks-proxy\" : \"%s\"", opt_socks_proxy);
for(i = 0; i < nDevs; i++)
if (!gpus[i].enabled)
if (gpus[i].deven == DEV_DISABLED)
break;
if (i < nDevs)
for (i = 0; i < nDevs; i++)
if (gpus[i].enabled)
if (gpus[i].deven != DEV_DISABLED)
fprintf(fcfg, ",\n\"device\" : \"%d\"", i);
if (opt_api_allow != NULL)
fprintf(fcfg, ",\n\"api-allow\" : \"%s\"", opt_api_allow);
@ -3401,7 +3403,7 @@ void *miner_thread(void *userdata) @@ -3401,7 +3403,7 @@ void *miner_thread(void *userdata)
tv_lastupdate = tv_end;
}
if (unlikely(mythr->pause || !cgpu->enabled)) {
if (unlikely(mythr->pause || cgpu->deven == DEV_DISABLED)) {
applog(LOG_WARNING, "Thread %d being disabled", thr_id);
mythr->rolling = mythr->cgpu->rolling = 0;
applog(LOG_DEBUG, "Popping wakeup ping in miner thread");
@ -3728,7 +3730,7 @@ static void *watchdog_thread(void __maybe_unused *userdata) @@ -3728,7 +3730,7 @@ static void *watchdog_thread(void __maybe_unused *userdata)
thr = &thr_info[i];
/* Don't touch disabled devices */
if (!thr->cgpu->enabled)
if (thr->cgpu->deven == DEV_DISABLED)
continue;
thr->pause = false;
tq_push(thr->q, &ping);
@ -3739,7 +3741,7 @@ static void *watchdog_thread(void __maybe_unused *userdata) @@ -3739,7 +3741,7 @@ static void *watchdog_thread(void __maybe_unused *userdata)
for (i = 0; i < total_devices; ++i) {
struct cgpu_info *cgpu = devices[i];
struct thr_info *thr = cgpu->thread;
bool *enable;
enum dev_enable *denable;
int gpu;
if (cgpu->api != &opencl_api)
@ -3748,10 +3750,10 @@ static void *watchdog_thread(void __maybe_unused *userdata) @@ -3748,10 +3750,10 @@ static void *watchdog_thread(void __maybe_unused *userdata)
if (i >= nDevs)
break;
gpu = thr->cgpu->device_id;
enable = &cgpu->enabled;
denable = &cgpu->deven;
#ifdef HAVE_ADL
if (adl_active && gpus[gpu].has_adl)
gpu_autotune(gpu, enable);
gpu_autotune(gpu, denable);
if (opt_debug && gpus[gpu].has_adl) {
int engineclock = 0, memclock = 0, activity = 0, fanspeed = 0, fanpercent = 0, powertune = 0;
float temp = 0, vddc = 0;
@ -3762,7 +3764,7 @@ static void *watchdog_thread(void __maybe_unused *userdata) @@ -3762,7 +3764,7 @@ static void *watchdog_thread(void __maybe_unused *userdata)
}
#endif
/* Thread is waiting on getwork or disabled */
if (thr->getwork || !*enable)
if (thr->getwork || *denable == DEV_DISABLED)
continue;
if (gpus[gpu].status != LIFE_WELL && now.tv_sec - thr->last.tv_sec < 60) {
@ -3880,7 +3882,7 @@ static void print_summary(void) @@ -3880,7 +3882,7 @@ static void print_summary(void)
applog(LOG_WARNING, "Summary of per device statistics:\n");
for (i = 0; i < total_devices; ++i) {
if (devices[i]->enabled)
if (devices[i]->deven == DEV_ENABLED)
log_print_status(devices[i]);
}
@ -4130,7 +4132,7 @@ static int cgminer_id_count = 0; @@ -4130,7 +4132,7 @@ static int cgminer_id_count = 0;
void enable_device(struct cgpu_info *cgpu)
{
cgpu->enabled = true;
cgpu->deven = DEV_ENABLED;
devices[cgpu->cgminer_id = cgminer_id_count++] = cgpu;
mining_threads += cgpu->threads;
#ifdef HAVE_OPENCL
@ -4306,7 +4308,7 @@ int main (int argc, char *argv[]) @@ -4306,7 +4308,7 @@ int main (int argc, char *argv[])
} else {
enable_device(devices[i]);
}
devices[i]->enabled = false;
devices[i]->deven = DEV_DISABLED;
}
}
total_devices = cgminer_id_count;
@ -4488,7 +4490,7 @@ int main (int argc, char *argv[]) @@ -4488,7 +4490,7 @@ int main (int argc, char *argv[])
/* Enable threads for devices set not to mine but disable
* their queue in case we wish to enable them later */
if (cgpu->enabled) {
if (cgpu->deven != DEV_DISABLED) {
applog(LOG_DEBUG, "Pushing ping to thread %d", thr->id);
tq_push(thr->q, &ping);

2
device-cpu.c

@ -742,7 +742,7 @@ static void cpu_detect() @@ -742,7 +742,7 @@ static void cpu_detect()
cgpu = devices[total_devices + i] = &cpus[i];
cgpu->api = &cpu_api;
cgpu->enabled = true;
cgpu->deven = DEV_ENABLED;
cgpu->device_id = i;
cgpu->threads = 1;
}

22
device-gpu.c

@ -430,7 +430,7 @@ void pause_dynamic_threads(int gpu) @@ -430,7 +430,7 @@ void pause_dynamic_threads(int gpu)
}
thr->pause = cgpu->dynamic;
if (!cgpu->dynamic && cgpu->enabled)
if (!cgpu->dynamic && cgpu->deven != DEV_DISABLED)
tq_push(thr->q, &ping);
}
}
@ -505,7 +505,7 @@ retry: @@ -505,7 +505,7 @@ retry:
if (thr->cgpu != cgpu)
continue;
get_datestamp(checkin, &thr->last);
wlog("Thread %d: %.1f Mh/s %s ", i, thr->rolling, cgpu->enabled ? "Enabled" : "Disabled");
wlog("Thread %d: %.1f Mh/s %s ", i, thr->rolling, cgpu->deven != DEV_DISABLED ? "Enabled" : "Disabled");
switch (cgpu->status) {
default:
case LIFE_WELL:
@ -546,11 +546,11 @@ retry: @@ -546,11 +546,11 @@ retry:
wlogprint("Invalid selection\n");
goto retry;
}
if (gpus[selected].enabled) {
if (gpus[selected].deven != DEV_DISABLED) {
wlogprint("Device already enabled\n");
goto retry;
}
gpus[selected].enabled = true;
gpus[selected].deven = DEV_ENABLED;
for (i = 0; i < mining_threads; ++i) {
thr = &thr_info[i];
cgpu = thr->cgpu;
@ -560,7 +560,7 @@ retry: @@ -560,7 +560,7 @@ retry:
continue;
if (cgpu->status != LIFE_WELL) {
wlogprint("Must restart device before enabling it");
gpus[selected].enabled = false;
gpus[selected].deven = DEV_DISABLED;
goto retry;
}
applog(LOG_DEBUG, "Pushing ping to thread %d", thr->id);
@ -575,11 +575,11 @@ retry: @@ -575,11 +575,11 @@ retry:
wlogprint("Invalid selection\n");
goto retry;
}
if (!gpus[selected].enabled) {
if (gpus[selected].deven == DEV_DISABLED) {
wlogprint("Device already disabled\n");
goto retry;
}
gpus[selected].enabled = false;
gpus[selected].deven = DEV_DISABLED;
goto retry;
} else if (!strncasecmp(&input, "i", 1)) {
int intensity;
@ -887,7 +887,7 @@ select_cgpu: @@ -887,7 +887,7 @@ select_cgpu:
}
gpu = cgpu->device_id;
cgpu->enabled = false;
cgpu->deven = DEV_DISABLED;
for (thr_id = 0; thr_id < mining_threads; ++thr_id) {
thr = &thr_info[thr_id];
@ -912,7 +912,7 @@ select_cgpu: @@ -912,7 +912,7 @@ select_cgpu:
applog(LOG_WARNING, "Thread %d no longer exists", thr_id);
}
cgpu->enabled = true;
cgpu->deven = DEV_ENABLED;
for (thr_id = 0; thr_id < mining_threads; ++thr_id) {
int virtual_gpu;
@ -1016,7 +1016,7 @@ static void opencl_detect() @@ -1016,7 +1016,7 @@ static void opencl_detect()
struct cgpu_info *cgpu;
cgpu = devices[total_devices++] = &gpus[i];
cgpu->enabled = true;
cgpu->deven = DEV_ENABLED;
cgpu->api = &opencl_api;
cgpu->device_id = i;
cgpu->threads = opt_g_threads;
@ -1105,7 +1105,7 @@ static bool opencl_thread_prepare(struct thr_info *thr) @@ -1105,7 +1105,7 @@ static bool opencl_thread_prepare(struct thr_info *thr)
free(buf);
}
}
cgpu->enabled = false;
cgpu->deven = DEV_DISABLED;
cgpu->status = LIFE_NOSTART;
return false;
}

9
miner.h

@ -207,6 +207,12 @@ struct device_api { @@ -207,6 +207,12 @@ struct device_api {
void (*thread_shutdown)(struct thr_info*);
};
enum dev_enable {
DEV_ENABLED,
DEV_DISABLED,
DEV_RECOVER,
};
struct cgpu_info {
int cgminer_id;
struct device_api *api;
@ -215,7 +221,7 @@ struct cgpu_info { @@ -215,7 +221,7 @@ struct cgpu_info {
FILE *device_file;
int device_fd;
bool enabled;
enum dev_enable deven;
int accepted;
int rejected;
int hw_errors;
@ -398,6 +404,7 @@ extern int opt_api_port; @@ -398,6 +404,7 @@ extern int opt_api_port;
extern bool opt_api_listen;
extern bool opt_api_network;
extern bool opt_delaynet;
extern bool opt_restart;
extern pthread_rwlock_t netacc_lock;

Loading…
Cancel
Save