mirror of
https://github.com/GOSTSec/sgminer
synced 2025-01-08 22:08:02 +00:00
Allow devices that are disabled due to overheating to be flagged as recovering instead of disabling them and re-enable them if they're below ideal
temperatures and --no-restart has not been set.
This commit is contained in:
parent
e9c3d7309a
commit
02295c69c8
11
adl.c
11
adl.c
@ -330,7 +330,7 @@ void init_adl(int nDevs)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!gpus[gpu].enabled) {
|
||||
if (gpus[gpu].deven == DEV_DISABLED) {
|
||||
gpus[i].gpu_engine =
|
||||
gpus[i].gpu_memclock =
|
||||
gpus[i].gpu_vddc =
|
||||
@ -1025,7 +1025,7 @@ static void fan_autotune(int gpu, int temp, int fanpercent, bool __maybe_unused
|
||||
}
|
||||
}
|
||||
|
||||
void gpu_autotune(int gpu, bool *enable)
|
||||
void gpu_autotune(int gpu, enum dev_enable *denable)
|
||||
{
|
||||
int temp, fanpercent, engine, newengine, twintemp = 0;
|
||||
bool fan_optimal = true;
|
||||
@ -1068,7 +1068,7 @@ void gpu_autotune(int gpu, bool *enable)
|
||||
if (engine && ga->autoengine) {
|
||||
if (temp > cgpu->cutofftemp) {
|
||||
applog(LOG_WARNING, "Hit thermal cutoff limit on GPU %d, disabling!", gpu);
|
||||
*enable = false;
|
||||
*denable = DEV_RECOVER;
|
||||
newengine = ga->minspeed;
|
||||
} else if (temp > ga->overtemp && engine > ga->minspeed) {
|
||||
applog(LOG_WARNING, "Overheat detected, decreasing GPU %d clock speed", gpu);
|
||||
@ -1077,9 +1077,12 @@ void gpu_autotune(int gpu, bool *enable)
|
||||
applog(LOG_DEBUG, "Temperature %d degrees over target, decreasing clock speed", opt_hysteresis);
|
||||
newengine = engine - ga->lpOdParameters.sEngineClock.iStep;
|
||||
/* Only try to tune engine speed up if this GPU is not disabled */
|
||||
} else if (temp < ga->targettemp && engine < ga->maxspeed && *enable) {
|
||||
} else if (temp < ga->targettemp && engine < ga->maxspeed && *denable == DEV_ENABLED) {
|
||||
applog(LOG_DEBUG, "Temperature below target, increasing clock speed");
|
||||
newengine = engine + ga->lpOdParameters.sEngineClock.iStep;
|
||||
} else if (temp < ga->targettemp && *denable == DEV_RECOVER && opt_restart) {
|
||||
applog(LOG_NOTICE, "Device recovered to temperature below target, re-enabling");
|
||||
*denable = DEV_ENABLED;
|
||||
}
|
||||
|
||||
if (newengine > ga->maxspeed)
|
||||
|
2
adl.h
2
adl.h
@ -17,7 +17,7 @@ int gpu_fanpercent(int gpu);
|
||||
bool gpu_stats(int gpu, float *temp, int *engineclock, int *memclock, float *vddc,
|
||||
int *activity, int *fanspeed, int *fanpercent, int *powertune);
|
||||
void change_gpusettings(int gpu);
|
||||
void gpu_autotune(int gpu, bool *enable);
|
||||
void gpu_autotune(int gpu, enum dev_enable *denable);
|
||||
void clear_adl(int nDevs);
|
||||
#else /* HAVE_ADL */
|
||||
#define adl_active (0)
|
||||
|
10
api.c
10
api.c
@ -522,7 +522,7 @@ static void gpustatus(int gpu, bool isjson)
|
||||
#endif
|
||||
gt = gv = gm = gc = ga = gf = gp = pt = 0;
|
||||
|
||||
if (cgpu->enabled)
|
||||
if (cgpu->deven != DEV_DISABLED)
|
||||
enabled = (char *)YES;
|
||||
else
|
||||
enabled = (char *)NO;
|
||||
@ -830,7 +830,7 @@ static void gpuenable(__maybe_unused SOCKETTYPE c, char *param, bool isjson)
|
||||
return;
|
||||
}
|
||||
|
||||
if (gpus[id].enabled) {
|
||||
if (gpus[id].deven != DEV_DISABLED) {
|
||||
strcpy(io_buffer, message(MSG_ALRENA, id, NULL, isjson));
|
||||
return;
|
||||
}
|
||||
@ -844,7 +844,7 @@ static void gpuenable(__maybe_unused SOCKETTYPE c, char *param, bool isjson)
|
||||
return;
|
||||
}
|
||||
|
||||
gpus[id].enabled = true;
|
||||
gpus[id].deven = DEV_ENABLED;
|
||||
tq_push(thr->q, &ping);
|
||||
|
||||
}
|
||||
@ -873,12 +873,12 @@ static void gpudisable(__maybe_unused SOCKETTYPE c, char *param, bool isjson)
|
||||
return;
|
||||
}
|
||||
|
||||
if (!gpus[id].enabled) {
|
||||
if (gpus[id].deven == DEV_DISABLED) {
|
||||
strcpy(io_buffer, message(MSG_ALRDIS, id, NULL, isjson));
|
||||
return;
|
||||
}
|
||||
|
||||
gpus[id].enabled = false;
|
||||
gpus[id].deven = DEV_DISABLED;
|
||||
|
||||
strcpy(io_buffer, message(MSG_GPUDIS, id, NULL, isjson));
|
||||
}
|
||||
|
@ -119,7 +119,7 @@ static bool bitforce_detect_one(const char *devpath)
|
||||
bitforce->api = &bitforce_api;
|
||||
bitforce->device_id = i++;
|
||||
bitforce->device_path = strdup(devpath);
|
||||
bitforce->enabled = true;
|
||||
bitforce->deven = DEV_ENABLED;
|
||||
bitforce->threads = 1;
|
||||
|
||||
return true;
|
||||
@ -254,7 +254,7 @@ static uint64_t bitforce_scanhash(struct thr_info *thr, struct work *work, uint6
|
||||
bitforce->temp = temp;
|
||||
if (temp > bitforce->cutofftemp) {
|
||||
applog(LOG_WARNING, "Hit thermal cutoff limit on %s %d, disabling!", bitforce->api->name, bitforce->device_id);
|
||||
bitforce->enabled = false;
|
||||
bitforce->deven = DEV_RECOVER;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
36
cgminer.c
36
cgminer.c
@ -100,7 +100,7 @@ static const bool opt_time = true;
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
int opt_dynamic_interval = 7;
|
||||
static bool opt_restart = true;
|
||||
bool opt_restart = true;
|
||||
static bool opt_nogpu;
|
||||
#endif
|
||||
|
||||
@ -1223,10 +1223,12 @@ static void curses_print_devstatus(int thr_id)
|
||||
wprintw(statuswin, "DEAD ");
|
||||
else if (cgpu->status == LIFE_SICK)
|
||||
wprintw(statuswin, "SICK ");
|
||||
else if (!cgpu->enabled)
|
||||
wprintw(statuswin, "OFF ");
|
||||
else
|
||||
wprintw(statuswin, "%5.1f", cgpu->rolling);
|
||||
else if (cgpu->deven == DEV_DISABLED)
|
||||
wprintw(statuswin, "OFF ");
|
||||
else if (cgpu->deven == DEV_RECOVER)
|
||||
wprintw(statuswin, "REST ");
|
||||
else
|
||||
wprintw(statuswin, "%5.1f", cgpu->rolling);
|
||||
adj_width(cgpu->accepted, &awidth);
|
||||
adj_width(cgpu->rejected, &rwidth);
|
||||
adj_width(cgpu->hw_errors, &hwwidth);
|
||||
@ -2380,11 +2382,11 @@ void write_config(FILE *fcfg)
|
||||
if (opt_socks_proxy && *opt_socks_proxy)
|
||||
fprintf(fcfg, ",\n\"socks-proxy\" : \"%s\"", opt_socks_proxy);
|
||||
for(i = 0; i < nDevs; i++)
|
||||
if (!gpus[i].enabled)
|
||||
if (gpus[i].deven == DEV_DISABLED)
|
||||
break;
|
||||
if (i < nDevs)
|
||||
for (i = 0; i < nDevs; i++)
|
||||
if (gpus[i].enabled)
|
||||
if (gpus[i].deven != DEV_DISABLED)
|
||||
fprintf(fcfg, ",\n\"device\" : \"%d\"", i);
|
||||
if (opt_api_allow != NULL)
|
||||
fprintf(fcfg, ",\n\"api-allow\" : \"%s\"", opt_api_allow);
|
||||
@ -3401,7 +3403,7 @@ void *miner_thread(void *userdata)
|
||||
tv_lastupdate = tv_end;
|
||||
}
|
||||
|
||||
if (unlikely(mythr->pause || !cgpu->enabled)) {
|
||||
if (unlikely(mythr->pause || cgpu->deven == DEV_DISABLED)) {
|
||||
applog(LOG_WARNING, "Thread %d being disabled", thr_id);
|
||||
mythr->rolling = mythr->cgpu->rolling = 0;
|
||||
applog(LOG_DEBUG, "Popping wakeup ping in miner thread");
|
||||
@ -3728,7 +3730,7 @@ static void *watchdog_thread(void __maybe_unused *userdata)
|
||||
thr = &thr_info[i];
|
||||
|
||||
/* Don't touch disabled devices */
|
||||
if (!thr->cgpu->enabled)
|
||||
if (thr->cgpu->deven == DEV_DISABLED)
|
||||
continue;
|
||||
thr->pause = false;
|
||||
tq_push(thr->q, &ping);
|
||||
@ -3739,7 +3741,7 @@ static void *watchdog_thread(void __maybe_unused *userdata)
|
||||
for (i = 0; i < total_devices; ++i) {
|
||||
struct cgpu_info *cgpu = devices[i];
|
||||
struct thr_info *thr = cgpu->thread;
|
||||
bool *enable;
|
||||
enum dev_enable *denable;
|
||||
int gpu;
|
||||
|
||||
if (cgpu->api != &opencl_api)
|
||||
@ -3748,10 +3750,10 @@ static void *watchdog_thread(void __maybe_unused *userdata)
|
||||
if (i >= nDevs)
|
||||
break;
|
||||
gpu = thr->cgpu->device_id;
|
||||
enable = &cgpu->enabled;
|
||||
denable = &cgpu->deven;
|
||||
#ifdef HAVE_ADL
|
||||
if (adl_active && gpus[gpu].has_adl)
|
||||
gpu_autotune(gpu, enable);
|
||||
gpu_autotune(gpu, denable);
|
||||
if (opt_debug && gpus[gpu].has_adl) {
|
||||
int engineclock = 0, memclock = 0, activity = 0, fanspeed = 0, fanpercent = 0, powertune = 0;
|
||||
float temp = 0, vddc = 0;
|
||||
@ -3762,7 +3764,7 @@ static void *watchdog_thread(void __maybe_unused *userdata)
|
||||
}
|
||||
#endif
|
||||
/* Thread is waiting on getwork or disabled */
|
||||
if (thr->getwork || !*enable)
|
||||
if (thr->getwork || *denable == DEV_DISABLED)
|
||||
continue;
|
||||
|
||||
if (gpus[gpu].status != LIFE_WELL && now.tv_sec - thr->last.tv_sec < 60) {
|
||||
@ -3880,7 +3882,7 @@ static void print_summary(void)
|
||||
|
||||
applog(LOG_WARNING, "Summary of per device statistics:\n");
|
||||
for (i = 0; i < total_devices; ++i) {
|
||||
if (devices[i]->enabled)
|
||||
if (devices[i]->deven == DEV_ENABLED)
|
||||
log_print_status(devices[i]);
|
||||
}
|
||||
|
||||
@ -4130,7 +4132,7 @@ static int cgminer_id_count = 0;
|
||||
|
||||
void enable_device(struct cgpu_info *cgpu)
|
||||
{
|
||||
cgpu->enabled = true;
|
||||
cgpu->deven = DEV_ENABLED;
|
||||
devices[cgpu->cgminer_id = cgminer_id_count++] = cgpu;
|
||||
mining_threads += cgpu->threads;
|
||||
#ifdef HAVE_OPENCL
|
||||
@ -4306,7 +4308,7 @@ int main (int argc, char *argv[])
|
||||
} else {
|
||||
enable_device(devices[i]);
|
||||
}
|
||||
devices[i]->enabled = false;
|
||||
devices[i]->deven = DEV_DISABLED;
|
||||
}
|
||||
}
|
||||
total_devices = cgminer_id_count;
|
||||
@ -4488,7 +4490,7 @@ int main (int argc, char *argv[])
|
||||
|
||||
/* Enable threads for devices set not to mine but disable
|
||||
* their queue in case we wish to enable them later */
|
||||
if (cgpu->enabled) {
|
||||
if (cgpu->deven != DEV_DISABLED) {
|
||||
applog(LOG_DEBUG, "Pushing ping to thread %d", thr->id);
|
||||
|
||||
tq_push(thr->q, &ping);
|
||||
|
@ -742,7 +742,7 @@ static void cpu_detect()
|
||||
|
||||
cgpu = devices[total_devices + i] = &cpus[i];
|
||||
cgpu->api = &cpu_api;
|
||||
cgpu->enabled = true;
|
||||
cgpu->deven = DEV_ENABLED;
|
||||
cgpu->device_id = i;
|
||||
cgpu->threads = 1;
|
||||
}
|
||||
|
22
device-gpu.c
22
device-gpu.c
@ -430,7 +430,7 @@ void pause_dynamic_threads(int gpu)
|
||||
}
|
||||
|
||||
thr->pause = cgpu->dynamic;
|
||||
if (!cgpu->dynamic && cgpu->enabled)
|
||||
if (!cgpu->dynamic && cgpu->deven != DEV_DISABLED)
|
||||
tq_push(thr->q, &ping);
|
||||
}
|
||||
}
|
||||
@ -505,7 +505,7 @@ retry:
|
||||
if (thr->cgpu != cgpu)
|
||||
continue;
|
||||
get_datestamp(checkin, &thr->last);
|
||||
wlog("Thread %d: %.1f Mh/s %s ", i, thr->rolling, cgpu->enabled ? "Enabled" : "Disabled");
|
||||
wlog("Thread %d: %.1f Mh/s %s ", i, thr->rolling, cgpu->deven != DEV_DISABLED ? "Enabled" : "Disabled");
|
||||
switch (cgpu->status) {
|
||||
default:
|
||||
case LIFE_WELL:
|
||||
@ -546,11 +546,11 @@ retry:
|
||||
wlogprint("Invalid selection\n");
|
||||
goto retry;
|
||||
}
|
||||
if (gpus[selected].enabled) {
|
||||
if (gpus[selected].deven != DEV_DISABLED) {
|
||||
wlogprint("Device already enabled\n");
|
||||
goto retry;
|
||||
}
|
||||
gpus[selected].enabled = true;
|
||||
gpus[selected].deven = DEV_ENABLED;
|
||||
for (i = 0; i < mining_threads; ++i) {
|
||||
thr = &thr_info[i];
|
||||
cgpu = thr->cgpu;
|
||||
@ -560,7 +560,7 @@ retry:
|
||||
continue;
|
||||
if (cgpu->status != LIFE_WELL) {
|
||||
wlogprint("Must restart device before enabling it");
|
||||
gpus[selected].enabled = false;
|
||||
gpus[selected].deven = DEV_DISABLED;
|
||||
goto retry;
|
||||
}
|
||||
applog(LOG_DEBUG, "Pushing ping to thread %d", thr->id);
|
||||
@ -575,11 +575,11 @@ retry:
|
||||
wlogprint("Invalid selection\n");
|
||||
goto retry;
|
||||
}
|
||||
if (!gpus[selected].enabled) {
|
||||
if (gpus[selected].deven == DEV_DISABLED) {
|
||||
wlogprint("Device already disabled\n");
|
||||
goto retry;
|
||||
}
|
||||
gpus[selected].enabled = false;
|
||||
gpus[selected].deven = DEV_DISABLED;
|
||||
goto retry;
|
||||
} else if (!strncasecmp(&input, "i", 1)) {
|
||||
int intensity;
|
||||
@ -887,7 +887,7 @@ select_cgpu:
|
||||
}
|
||||
|
||||
gpu = cgpu->device_id;
|
||||
cgpu->enabled = false;
|
||||
cgpu->deven = DEV_DISABLED;
|
||||
|
||||
for (thr_id = 0; thr_id < mining_threads; ++thr_id) {
|
||||
thr = &thr_info[thr_id];
|
||||
@ -912,7 +912,7 @@ select_cgpu:
|
||||
applog(LOG_WARNING, "Thread %d no longer exists", thr_id);
|
||||
}
|
||||
|
||||
cgpu->enabled = true;
|
||||
cgpu->deven = DEV_ENABLED;
|
||||
|
||||
for (thr_id = 0; thr_id < mining_threads; ++thr_id) {
|
||||
int virtual_gpu;
|
||||
@ -1016,7 +1016,7 @@ static void opencl_detect()
|
||||
struct cgpu_info *cgpu;
|
||||
|
||||
cgpu = devices[total_devices++] = &gpus[i];
|
||||
cgpu->enabled = true;
|
||||
cgpu->deven = DEV_ENABLED;
|
||||
cgpu->api = &opencl_api;
|
||||
cgpu->device_id = i;
|
||||
cgpu->threads = opt_g_threads;
|
||||
@ -1105,7 +1105,7 @@ static bool opencl_thread_prepare(struct thr_info *thr)
|
||||
free(buf);
|
||||
}
|
||||
}
|
||||
cgpu->enabled = false;
|
||||
cgpu->deven = DEV_DISABLED;
|
||||
cgpu->status = LIFE_NOSTART;
|
||||
return false;
|
||||
}
|
||||
|
9
miner.h
9
miner.h
@ -207,6 +207,12 @@ struct device_api {
|
||||
void (*thread_shutdown)(struct thr_info*);
|
||||
};
|
||||
|
||||
enum dev_enable {
|
||||
DEV_ENABLED,
|
||||
DEV_DISABLED,
|
||||
DEV_RECOVER,
|
||||
};
|
||||
|
||||
struct cgpu_info {
|
||||
int cgminer_id;
|
||||
struct device_api *api;
|
||||
@ -215,7 +221,7 @@ struct cgpu_info {
|
||||
FILE *device_file;
|
||||
int device_fd;
|
||||
|
||||
bool enabled;
|
||||
enum dev_enable deven;
|
||||
int accepted;
|
||||
int rejected;
|
||||
int hw_errors;
|
||||
@ -398,6 +404,7 @@ extern int opt_api_port;
|
||||
extern bool opt_api_listen;
|
||||
extern bool opt_api_network;
|
||||
extern bool opt_delaynet;
|
||||
extern bool opt_restart;
|
||||
|
||||
extern pthread_rwlock_t netacc_lock;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user