Browse Source

Allow devices that are disabled due to overheating to be flagged as recovering instead of disabling them and re-enable them if they're below ideal

temperatures and --no-restart has not been set.
nfactor-troky
Con Kolivas 13 years ago
parent
commit
02295c69c8
  1. 11
      adl.c
  2. 2
      adl.h
  3. 10
      api.c
  4. 4
      bitforce.c
  5. 36
      cgminer.c
  6. 2
      device-cpu.c
  7. 22
      device-gpu.c
  8. 9
      miner.h

11
adl.c

@ -330,7 +330,7 @@ void init_adl(int nDevs)
continue; continue;
} }
if (!gpus[gpu].enabled) { if (gpus[gpu].deven == DEV_DISABLED) {
gpus[i].gpu_engine = gpus[i].gpu_engine =
gpus[i].gpu_memclock = gpus[i].gpu_memclock =
gpus[i].gpu_vddc = gpus[i].gpu_vddc =
@ -1025,7 +1025,7 @@ static void fan_autotune(int gpu, int temp, int fanpercent, bool __maybe_unused
} }
} }
void gpu_autotune(int gpu, bool *enable) void gpu_autotune(int gpu, enum dev_enable *denable)
{ {
int temp, fanpercent, engine, newengine, twintemp = 0; int temp, fanpercent, engine, newengine, twintemp = 0;
bool fan_optimal = true; bool fan_optimal = true;
@ -1068,7 +1068,7 @@ void gpu_autotune(int gpu, bool *enable)
if (engine && ga->autoengine) { if (engine && ga->autoengine) {
if (temp > cgpu->cutofftemp) { if (temp > cgpu->cutofftemp) {
applog(LOG_WARNING, "Hit thermal cutoff limit on GPU %d, disabling!", gpu); applog(LOG_WARNING, "Hit thermal cutoff limit on GPU %d, disabling!", gpu);
*enable = false; *denable = DEV_RECOVER;
newengine = ga->minspeed; newengine = ga->minspeed;
} else if (temp > ga->overtemp && engine > ga->minspeed) { } else if (temp > ga->overtemp && engine > ga->minspeed) {
applog(LOG_WARNING, "Overheat detected, decreasing GPU %d clock speed", gpu); applog(LOG_WARNING, "Overheat detected, decreasing GPU %d clock speed", gpu);
@ -1077,9 +1077,12 @@ void gpu_autotune(int gpu, bool *enable)
applog(LOG_DEBUG, "Temperature %d degrees over target, decreasing clock speed", opt_hysteresis); applog(LOG_DEBUG, "Temperature %d degrees over target, decreasing clock speed", opt_hysteresis);
newengine = engine - ga->lpOdParameters.sEngineClock.iStep; newengine = engine - ga->lpOdParameters.sEngineClock.iStep;
/* Only try to tune engine speed up if this GPU is not disabled */ /* Only try to tune engine speed up if this GPU is not disabled */
} else if (temp < ga->targettemp && engine < ga->maxspeed && *enable) { } else if (temp < ga->targettemp && engine < ga->maxspeed && *denable == DEV_ENABLED) {
applog(LOG_DEBUG, "Temperature below target, increasing clock speed"); applog(LOG_DEBUG, "Temperature below target, increasing clock speed");
newengine = engine + ga->lpOdParameters.sEngineClock.iStep; newengine = engine + ga->lpOdParameters.sEngineClock.iStep;
} else if (temp < ga->targettemp && *denable == DEV_RECOVER && opt_restart) {
applog(LOG_NOTICE, "Device recovered to temperature below target, re-enabling");
*denable = DEV_ENABLED;
} }
if (newengine > ga->maxspeed) if (newengine > ga->maxspeed)

2
adl.h

@ -17,7 +17,7 @@ int gpu_fanpercent(int gpu);
bool gpu_stats(int gpu, float *temp, int *engineclock, int *memclock, float *vddc, bool gpu_stats(int gpu, float *temp, int *engineclock, int *memclock, float *vddc,
int *activity, int *fanspeed, int *fanpercent, int *powertune); int *activity, int *fanspeed, int *fanpercent, int *powertune);
void change_gpusettings(int gpu); void change_gpusettings(int gpu);
void gpu_autotune(int gpu, bool *enable); void gpu_autotune(int gpu, enum dev_enable *denable);
void clear_adl(int nDevs); void clear_adl(int nDevs);
#else /* HAVE_ADL */ #else /* HAVE_ADL */
#define adl_active (0) #define adl_active (0)

10
api.c

@ -522,7 +522,7 @@ static void gpustatus(int gpu, bool isjson)
#endif #endif
gt = gv = gm = gc = ga = gf = gp = pt = 0; gt = gv = gm = gc = ga = gf = gp = pt = 0;
if (cgpu->enabled) if (cgpu->deven != DEV_DISABLED)
enabled = (char *)YES; enabled = (char *)YES;
else else
enabled = (char *)NO; enabled = (char *)NO;
@ -830,7 +830,7 @@ static void gpuenable(__maybe_unused SOCKETTYPE c, char *param, bool isjson)
return; return;
} }
if (gpus[id].enabled) { if (gpus[id].deven != DEV_DISABLED) {
strcpy(io_buffer, message(MSG_ALRENA, id, NULL, isjson)); strcpy(io_buffer, message(MSG_ALRENA, id, NULL, isjson));
return; return;
} }
@ -844,7 +844,7 @@ static void gpuenable(__maybe_unused SOCKETTYPE c, char *param, bool isjson)
return; return;
} }
gpus[id].enabled = true; gpus[id].deven = DEV_ENABLED;
tq_push(thr->q, &ping); tq_push(thr->q, &ping);
} }
@ -873,12 +873,12 @@ static void gpudisable(__maybe_unused SOCKETTYPE c, char *param, bool isjson)
return; return;
} }
if (!gpus[id].enabled) { if (gpus[id].deven == DEV_DISABLED) {
strcpy(io_buffer, message(MSG_ALRDIS, id, NULL, isjson)); strcpy(io_buffer, message(MSG_ALRDIS, id, NULL, isjson));
return; return;
} }
gpus[id].enabled = false; gpus[id].deven = DEV_DISABLED;
strcpy(io_buffer, message(MSG_GPUDIS, id, NULL, isjson)); strcpy(io_buffer, message(MSG_GPUDIS, id, NULL, isjson));
} }

4
bitforce.c

@ -119,7 +119,7 @@ static bool bitforce_detect_one(const char *devpath)
bitforce->api = &bitforce_api; bitforce->api = &bitforce_api;
bitforce->device_id = i++; bitforce->device_id = i++;
bitforce->device_path = strdup(devpath); bitforce->device_path = strdup(devpath);
bitforce->enabled = true; bitforce->deven = DEV_ENABLED;
bitforce->threads = 1; bitforce->threads = 1;
return true; return true;
@ -254,7 +254,7 @@ static uint64_t bitforce_scanhash(struct thr_info *thr, struct work *work, uint6
bitforce->temp = temp; bitforce->temp = temp;
if (temp > bitforce->cutofftemp) { if (temp > bitforce->cutofftemp) {
applog(LOG_WARNING, "Hit thermal cutoff limit on %s %d, disabling!", bitforce->api->name, bitforce->device_id); applog(LOG_WARNING, "Hit thermal cutoff limit on %s %d, disabling!", bitforce->api->name, bitforce->device_id);
bitforce->enabled = false; bitforce->deven = DEV_RECOVER;
} }
} }
} }

36
cgminer.c

@ -100,7 +100,7 @@ static const bool opt_time = true;
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
int opt_dynamic_interval = 7; int opt_dynamic_interval = 7;
static bool opt_restart = true; bool opt_restart = true;
static bool opt_nogpu; static bool opt_nogpu;
#endif #endif
@ -1223,10 +1223,12 @@ static void curses_print_devstatus(int thr_id)
wprintw(statuswin, "DEAD "); wprintw(statuswin, "DEAD ");
else if (cgpu->status == LIFE_SICK) else if (cgpu->status == LIFE_SICK)
wprintw(statuswin, "SICK "); wprintw(statuswin, "SICK ");
else if (!cgpu->enabled) else if (cgpu->deven == DEV_DISABLED)
wprintw(statuswin, "OFF "); wprintw(statuswin, "OFF ");
else else if (cgpu->deven == DEV_RECOVER)
wprintw(statuswin, "%5.1f", cgpu->rolling); wprintw(statuswin, "REST ");
else
wprintw(statuswin, "%5.1f", cgpu->rolling);
adj_width(cgpu->accepted, &awidth); adj_width(cgpu->accepted, &awidth);
adj_width(cgpu->rejected, &rwidth); adj_width(cgpu->rejected, &rwidth);
adj_width(cgpu->hw_errors, &hwwidth); adj_width(cgpu->hw_errors, &hwwidth);
@ -2380,11 +2382,11 @@ void write_config(FILE *fcfg)
if (opt_socks_proxy && *opt_socks_proxy) if (opt_socks_proxy && *opt_socks_proxy)
fprintf(fcfg, ",\n\"socks-proxy\" : \"%s\"", opt_socks_proxy); fprintf(fcfg, ",\n\"socks-proxy\" : \"%s\"", opt_socks_proxy);
for(i = 0; i < nDevs; i++) for(i = 0; i < nDevs; i++)
if (!gpus[i].enabled) if (gpus[i].deven == DEV_DISABLED)
break; break;
if (i < nDevs) if (i < nDevs)
for (i = 0; i < nDevs; i++) for (i = 0; i < nDevs; i++)
if (gpus[i].enabled) if (gpus[i].deven != DEV_DISABLED)
fprintf(fcfg, ",\n\"device\" : \"%d\"", i); fprintf(fcfg, ",\n\"device\" : \"%d\"", i);
if (opt_api_allow != NULL) if (opt_api_allow != NULL)
fprintf(fcfg, ",\n\"api-allow\" : \"%s\"", opt_api_allow); fprintf(fcfg, ",\n\"api-allow\" : \"%s\"", opt_api_allow);
@ -3401,7 +3403,7 @@ void *miner_thread(void *userdata)
tv_lastupdate = tv_end; tv_lastupdate = tv_end;
} }
if (unlikely(mythr->pause || !cgpu->enabled)) { if (unlikely(mythr->pause || cgpu->deven == DEV_DISABLED)) {
applog(LOG_WARNING, "Thread %d being disabled", thr_id); applog(LOG_WARNING, "Thread %d being disabled", thr_id);
mythr->rolling = mythr->cgpu->rolling = 0; mythr->rolling = mythr->cgpu->rolling = 0;
applog(LOG_DEBUG, "Popping wakeup ping in miner thread"); applog(LOG_DEBUG, "Popping wakeup ping in miner thread");
@ -3728,7 +3730,7 @@ static void *watchdog_thread(void __maybe_unused *userdata)
thr = &thr_info[i]; thr = &thr_info[i];
/* Don't touch disabled devices */ /* Don't touch disabled devices */
if (!thr->cgpu->enabled) if (thr->cgpu->deven == DEV_DISABLED)
continue; continue;
thr->pause = false; thr->pause = false;
tq_push(thr->q, &ping); tq_push(thr->q, &ping);
@ -3739,7 +3741,7 @@ static void *watchdog_thread(void __maybe_unused *userdata)
for (i = 0; i < total_devices; ++i) { for (i = 0; i < total_devices; ++i) {
struct cgpu_info *cgpu = devices[i]; struct cgpu_info *cgpu = devices[i];
struct thr_info *thr = cgpu->thread; struct thr_info *thr = cgpu->thread;
bool *enable; enum dev_enable *denable;
int gpu; int gpu;
if (cgpu->api != &opencl_api) if (cgpu->api != &opencl_api)
@ -3748,10 +3750,10 @@ static void *watchdog_thread(void __maybe_unused *userdata)
if (i >= nDevs) if (i >= nDevs)
break; break;
gpu = thr->cgpu->device_id; gpu = thr->cgpu->device_id;
enable = &cgpu->enabled; denable = &cgpu->deven;
#ifdef HAVE_ADL #ifdef HAVE_ADL
if (adl_active && gpus[gpu].has_adl) if (adl_active && gpus[gpu].has_adl)
gpu_autotune(gpu, enable); gpu_autotune(gpu, denable);
if (opt_debug && gpus[gpu].has_adl) { if (opt_debug && gpus[gpu].has_adl) {
int engineclock = 0, memclock = 0, activity = 0, fanspeed = 0, fanpercent = 0, powertune = 0; int engineclock = 0, memclock = 0, activity = 0, fanspeed = 0, fanpercent = 0, powertune = 0;
float temp = 0, vddc = 0; float temp = 0, vddc = 0;
@ -3762,7 +3764,7 @@ static void *watchdog_thread(void __maybe_unused *userdata)
} }
#endif #endif
/* Thread is waiting on getwork or disabled */ /* Thread is waiting on getwork or disabled */
if (thr->getwork || !*enable) if (thr->getwork || *denable == DEV_DISABLED)
continue; continue;
if (gpus[gpu].status != LIFE_WELL && now.tv_sec - thr->last.tv_sec < 60) { if (gpus[gpu].status != LIFE_WELL && now.tv_sec - thr->last.tv_sec < 60) {
@ -3880,7 +3882,7 @@ static void print_summary(void)
applog(LOG_WARNING, "Summary of per device statistics:\n"); applog(LOG_WARNING, "Summary of per device statistics:\n");
for (i = 0; i < total_devices; ++i) { for (i = 0; i < total_devices; ++i) {
if (devices[i]->enabled) if (devices[i]->deven == DEV_ENABLED)
log_print_status(devices[i]); log_print_status(devices[i]);
} }
@ -4130,7 +4132,7 @@ static int cgminer_id_count = 0;
void enable_device(struct cgpu_info *cgpu) void enable_device(struct cgpu_info *cgpu)
{ {
cgpu->enabled = true; cgpu->deven = DEV_ENABLED;
devices[cgpu->cgminer_id = cgminer_id_count++] = cgpu; devices[cgpu->cgminer_id = cgminer_id_count++] = cgpu;
mining_threads += cgpu->threads; mining_threads += cgpu->threads;
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
@ -4306,7 +4308,7 @@ int main (int argc, char *argv[])
} else { } else {
enable_device(devices[i]); enable_device(devices[i]);
} }
devices[i]->enabled = false; devices[i]->deven = DEV_DISABLED;
} }
} }
total_devices = cgminer_id_count; total_devices = cgminer_id_count;
@ -4488,7 +4490,7 @@ int main (int argc, char *argv[])
/* Enable threads for devices set not to mine but disable /* Enable threads for devices set not to mine but disable
* their queue in case we wish to enable them later */ * their queue in case we wish to enable them later */
if (cgpu->enabled) { if (cgpu->deven != DEV_DISABLED) {
applog(LOG_DEBUG, "Pushing ping to thread %d", thr->id); applog(LOG_DEBUG, "Pushing ping to thread %d", thr->id);
tq_push(thr->q, &ping); tq_push(thr->q, &ping);

2
device-cpu.c

@ -742,7 +742,7 @@ static void cpu_detect()
cgpu = devices[total_devices + i] = &cpus[i]; cgpu = devices[total_devices + i] = &cpus[i];
cgpu->api = &cpu_api; cgpu->api = &cpu_api;
cgpu->enabled = true; cgpu->deven = DEV_ENABLED;
cgpu->device_id = i; cgpu->device_id = i;
cgpu->threads = 1; cgpu->threads = 1;
} }

22
device-gpu.c

@ -430,7 +430,7 @@ void pause_dynamic_threads(int gpu)
} }
thr->pause = cgpu->dynamic; thr->pause = cgpu->dynamic;
if (!cgpu->dynamic && cgpu->enabled) if (!cgpu->dynamic && cgpu->deven != DEV_DISABLED)
tq_push(thr->q, &ping); tq_push(thr->q, &ping);
} }
} }
@ -505,7 +505,7 @@ retry:
if (thr->cgpu != cgpu) if (thr->cgpu != cgpu)
continue; continue;
get_datestamp(checkin, &thr->last); get_datestamp(checkin, &thr->last);
wlog("Thread %d: %.1f Mh/s %s ", i, thr->rolling, cgpu->enabled ? "Enabled" : "Disabled"); wlog("Thread %d: %.1f Mh/s %s ", i, thr->rolling, cgpu->deven != DEV_DISABLED ? "Enabled" : "Disabled");
switch (cgpu->status) { switch (cgpu->status) {
default: default:
case LIFE_WELL: case LIFE_WELL:
@ -546,11 +546,11 @@ retry:
wlogprint("Invalid selection\n"); wlogprint("Invalid selection\n");
goto retry; goto retry;
} }
if (gpus[selected].enabled) { if (gpus[selected].deven != DEV_DISABLED) {
wlogprint("Device already enabled\n"); wlogprint("Device already enabled\n");
goto retry; goto retry;
} }
gpus[selected].enabled = true; gpus[selected].deven = DEV_ENABLED;
for (i = 0; i < mining_threads; ++i) { for (i = 0; i < mining_threads; ++i) {
thr = &thr_info[i]; thr = &thr_info[i];
cgpu = thr->cgpu; cgpu = thr->cgpu;
@ -560,7 +560,7 @@ retry:
continue; continue;
if (cgpu->status != LIFE_WELL) { if (cgpu->status != LIFE_WELL) {
wlogprint("Must restart device before enabling it"); wlogprint("Must restart device before enabling it");
gpus[selected].enabled = false; gpus[selected].deven = DEV_DISABLED;
goto retry; goto retry;
} }
applog(LOG_DEBUG, "Pushing ping to thread %d", thr->id); applog(LOG_DEBUG, "Pushing ping to thread %d", thr->id);
@ -575,11 +575,11 @@ retry:
wlogprint("Invalid selection\n"); wlogprint("Invalid selection\n");
goto retry; goto retry;
} }
if (!gpus[selected].enabled) { if (gpus[selected].deven == DEV_DISABLED) {
wlogprint("Device already disabled\n"); wlogprint("Device already disabled\n");
goto retry; goto retry;
} }
gpus[selected].enabled = false; gpus[selected].deven = DEV_DISABLED;
goto retry; goto retry;
} else if (!strncasecmp(&input, "i", 1)) { } else if (!strncasecmp(&input, "i", 1)) {
int intensity; int intensity;
@ -887,7 +887,7 @@ select_cgpu:
} }
gpu = cgpu->device_id; gpu = cgpu->device_id;
cgpu->enabled = false; cgpu->deven = DEV_DISABLED;
for (thr_id = 0; thr_id < mining_threads; ++thr_id) { for (thr_id = 0; thr_id < mining_threads; ++thr_id) {
thr = &thr_info[thr_id]; thr = &thr_info[thr_id];
@ -912,7 +912,7 @@ select_cgpu:
applog(LOG_WARNING, "Thread %d no longer exists", thr_id); applog(LOG_WARNING, "Thread %d no longer exists", thr_id);
} }
cgpu->enabled = true; cgpu->deven = DEV_ENABLED;
for (thr_id = 0; thr_id < mining_threads; ++thr_id) { for (thr_id = 0; thr_id < mining_threads; ++thr_id) {
int virtual_gpu; int virtual_gpu;
@ -1016,7 +1016,7 @@ static void opencl_detect()
struct cgpu_info *cgpu; struct cgpu_info *cgpu;
cgpu = devices[total_devices++] = &gpus[i]; cgpu = devices[total_devices++] = &gpus[i];
cgpu->enabled = true; cgpu->deven = DEV_ENABLED;
cgpu->api = &opencl_api; cgpu->api = &opencl_api;
cgpu->device_id = i; cgpu->device_id = i;
cgpu->threads = opt_g_threads; cgpu->threads = opt_g_threads;
@ -1105,7 +1105,7 @@ static bool opencl_thread_prepare(struct thr_info *thr)
free(buf); free(buf);
} }
} }
cgpu->enabled = false; cgpu->deven = DEV_DISABLED;
cgpu->status = LIFE_NOSTART; cgpu->status = LIFE_NOSTART;
return false; return false;
} }

9
miner.h

@ -207,6 +207,12 @@ struct device_api {
void (*thread_shutdown)(struct thr_info*); void (*thread_shutdown)(struct thr_info*);
}; };
enum dev_enable {
DEV_ENABLED,
DEV_DISABLED,
DEV_RECOVER,
};
struct cgpu_info { struct cgpu_info {
int cgminer_id; int cgminer_id;
struct device_api *api; struct device_api *api;
@ -215,7 +221,7 @@ struct cgpu_info {
FILE *device_file; FILE *device_file;
int device_fd; int device_fd;
bool enabled; enum dev_enable deven;
int accepted; int accepted;
int rejected; int rejected;
int hw_errors; int hw_errors;
@ -398,6 +404,7 @@ extern int opt_api_port;
extern bool opt_api_listen; extern bool opt_api_listen;
extern bool opt_api_network; extern bool opt_api_network;
extern bool opt_delaynet; extern bool opt_delaynet;
extern bool opt_restart;
extern pthread_rwlock_t netacc_lock; extern pthread_rwlock_t netacc_lock;

Loading…
Cancel
Save