From 543265d66af56b17684a716ed6db13c383420430 Mon Sep 17 00:00:00 2001 From: Luke Dashjr Date: Tue, 24 Jan 2012 14:34:17 -0500 Subject: [PATCH] Generalize --temp-cutoff and implement support for reading temperature from BitFORCE FPGAs --- README | 2 +- adl.c | 12 ++++++------ adl.h | 1 - bitforce.c | 29 +++++++++++++++++++++++++++++ cgminer.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++--- device-gpu.c | 33 --------------------------------- device-gpu.h | 1 - miner.h | 7 ++++++- 8 files changed, 90 insertions(+), 46 deletions(-) diff --git a/README b/README index b592c4b1..719fc39e 100644 --- a/README +++ b/README @@ -153,6 +153,7 @@ Options for both config file and command line: --socks-proxy Set socks4 proxy (host:port) --submit-stale Submit shares even if they would normally be considered stale --syslog Use system log for output messages (default: standard error) +--temp-cutoff Temperature where a device will be automatically disabled, one value or comma separated list (default: 95) --text-only|-T Disable ncurses formatted screen output --url|-o URL for bitcoin JSON-RPC server --user|-u Username for bitcoin JSON-RPC server @@ -183,7 +184,6 @@ GPU only options: --kernel-path|-K Specify a path to where the kernel .cl files are (default: "/usr/local/bin") --kernel|-k Select kernel to use (poclbm or phatk - default: auto) --no-restart Do not attempt to restart GPUs that hang ---temp-cutoff Temperature where a GPU device will be automatically disabled, one value or comma separated list (default: 95) --temp-hysteresis Set how much the temperature can fluctuate outside limits when automanaging speeds (default: 3) --temp-overheat Overheat temperature when automatically managing fan and GPU speeds (default: 85) --temp-target Target temperature when automatically managing fan and GPU speeds (default: 75) diff --git a/adl.c b/adl.c index d1cd50e5..8b2b1ff8 100644 --- a/adl.c +++ b/adl.c @@ -26,7 +26,6 @@ bool opt_reorder = false; int opt_hysteresis = 3; const int opt_targettemp = 75; const int opt_overheattemp = 85; -const int opt_cutofftemp = 95; static pthread_mutex_t adl_lock; struct gpu_adapters { @@ -443,8 +442,8 @@ void init_adl(int nDevs) ga->targettemp = opt_targettemp; if (!ga->overtemp) ga->overtemp = opt_overheattemp; - if (!ga->cutofftemp) - ga->cutofftemp = opt_cutofftemp; + if (!gpus[gpu].cutofftemp) + gpus[gpu].cutofftemp = opt_cutofftemp; if (opt_autofan) { ga->autofan = true; /* Set a safe starting default if we're automanaging fan speeds */ @@ -503,6 +502,7 @@ float gpu_temp(int gpu) lock_adl(); ret = __gpu_temp(ga); unlock_adl(); + gpus[gpu].temp = ret; return ret; } @@ -1055,7 +1055,7 @@ void gpu_autotune(int gpu, bool *enable) } if (engine && ga->autoengine) { - if (temp > ga->cutofftemp) { + if (temp > cgpu->cutofftemp) { applog(LOG_WARNING, "Hit thermal cutoff limit on GPU %d, disabling!", gpu); *enable = false; newengine = ga->minspeed; @@ -1122,7 +1122,7 @@ void change_autosettings(int gpu) wlogprint("Target temperature: %d\n", ga->targettemp); wlogprint("Overheat temperature: %d\n", ga->overtemp); - wlogprint("Cutoff temperature: %d\n", ga->cutofftemp); + wlogprint("Cutoff temperature: %d\n", gpus[gpu].cutofftemp); wlogprint("Toggle [F]an auto [G]PU auto\nChange [T]arget [O]verheat [C]utoff\n"); wlogprint("Or press any other key to continue\n"); input = getch(); @@ -1159,7 +1159,7 @@ void change_autosettings(int gpu) if (val <= ga->overtemp || val > 200) wlogprint("Invalid temperature"); else - ga->cutofftemp = val; + gpus[gpu].cutofftemp = val; } } diff --git a/adl.h b/adl.h index 21567a80..5b98f7e4 100644 --- a/adl.h +++ b/adl.h @@ -6,7 +6,6 @@ bool opt_reorder; int opt_hysteresis; const int opt_targettemp; const int opt_overheattemp; -const int opt_cutofftemp; void init_adl(int nDevs); float gpu_temp(int gpu); int gpu_engineclock(int gpu); diff --git a/bitforce.c b/bitforce.c index 939455b8..005a632c 100644 --- a/bitforce.c +++ b/bitforce.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -146,6 +147,16 @@ static void bitforce_detect() bitforce_detect_auto(); } +static void get_bitforce_statline_before(char *buf, struct cgpu_info *bitforce) +{ + float gt = bitforce->temp; + if (gt > 0) + tailsprintf(buf, "%5.1fC ", gt); + else + tailsprintf(buf, " ", gt); + tailsprintf(buf, " | "); +} + static bool bitforce_thread_prepare(struct thr_info *thr) { struct cgpu_info *bitforce = thr->cgpu; @@ -221,6 +232,23 @@ static uint64_t bitforce_scanhash(struct thr_info *thr, struct work *work, uint6 return 0; } + BFwrite(fdDev, "ZKX", 3); + BFgets(pdevbuf, sizeof(pdevbuf), fdDev); + if (unlikely(!pdevbuf[0])) { + applog(LOG_ERR, "Error reading from BitForce (ZKX)"); + return 0; + } + if (!strncasecmp(pdevbuf, "TEMP:", 5)) { + float temp = strtof(pdevbuf + 5, NULL); + if (temp > 0) { + bitforce->temp = temp; + if (temp > bitforce->cutofftemp) { + applog(LOG_WARNING, "Hit thermal cutoff limit on %s %d, disabling!", bitforce->api->name, bitforce->device_id); + bitforce->enabled = false; + } + } + } + usleep(4500000); i = 4500; while (1) { @@ -267,6 +295,7 @@ struct device_api bitforce_api = { .name = "BFL", .api_detect = bitforce_detect, // .reinit_device = TODO + .get_statline_before = get_bitforce_statline_before, .thread_prepare = bitforce_thread_prepare, .scanhash = bitforce_scanhash, }; diff --git a/cgminer.c b/cgminer.c index 26a15802..57813918 100644 --- a/cgminer.c +++ b/cgminer.c @@ -90,6 +90,7 @@ bool use_syslog = false; static bool opt_quiet = false; static bool opt_realquiet = false; bool opt_loginput = false; +const int opt_cutofftemp = 95; static int opt_retries = -1; static int opt_fail_pause = 5; static int fail_pause = 5; @@ -537,6 +538,46 @@ static char *set_schedtime(const char *arg, struct schedtime *st) return NULL; } +static char *temp_cutoff_str = NULL; + +char *set_temp_cutoff(char *arg) +{ + int val; + + if (!(arg && arg[0])) + return "Invalid parameters for set temp cutoff"; + val = atoi(arg); + if (val < 0 || val > 200) + return "Invalid value passed to set temp cutoff"; + temp_cutoff_str = arg; + + return NULL; +} + +static void load_temp_cutoffs() +{ + int i, val = 0, device = 0; + char *nextptr; + + if (temp_cutoff_str) { + for (device = 0, nextptr = strtok(temp_cutoff_str, ","); nextptr; ++device, nextptr = strtok(NULL, ",")) { + if (device >= total_devices) + quit(1, "Too many values passed to set temp cutoff"); + val = atoi(nextptr); + if (val < 0 || val > 200) + quit(1, "Invalid value passed to set temp cutoff"); + + devices[device]->cutofftemp = val; + } + } + else + val = opt_cutofftemp; + if (device <= 1) { + for (i = device; i < total_devices; ++i) + devices[i]->cutofftemp = val; + } +} + static char *set_api_allow(const char *arg) { opt_set_charp(arg, &opt_api_allow); @@ -764,10 +805,12 @@ static struct opt_table opt_config_table[] = { opt_set_bool, &use_syslog, "Use system log for output messages (default: standard error)"), #endif -#ifdef HAVE_ADL +#if defined(HAVE_ADL) || defined(USE_BITFORCE) OPT_WITH_ARG("--temp-cutoff", set_temp_cutoff, opt_show_intval, &opt_cutofftemp, - "Temperature where a GPU device will be automatically disabled, one value or comma separated list"), + "Temperature where a device will be automatically disabled, one value or comma separated list"), +#endif +#ifdef HAVE_ADL OPT_WITH_ARG("--temp-hysteresis", set_int_1_to_10, opt_show_intval, &opt_hysteresis, "Set how much the temperature can fluctuate outside limits when automanaging speeds"), @@ -2237,7 +2280,7 @@ void write_config(FILE *fcfg) fprintf(fcfg, "%s%1.3f", i > 0 ? "," : "", gpus[i].gpu_vddc); fputs("\",\n\"temp-cutoff\" : \"", fcfg); for(i = 0; i < nDevs; i++) - fprintf(fcfg, "%s%d", i > 0 ? "," : "", gpus[i].adl.cutofftemp); + fprintf(fcfg, "%s%d", i > 0 ? "," : "", gpus[i].cutofftemp); fputs("\",\n\"temp-overheat\" : \"", fcfg); for(i = 0; i < nDevs; i++) fprintf(fcfg, "%s%d", i > 0 ? "," : "", gpus[i].adl.overtemp); @@ -4237,6 +4280,8 @@ int main (int argc, char *argv[]) if (!total_devices) quit(1, "All devices disabled, cannot mine!"); + load_temp_cutoffs(); + devcursor = 8; logstart = devcursor + total_devices + 1; logcursor = logstart + 1; diff --git a/device-gpu.c b/device-gpu.c index 0e8b0ed0..280434b7 100644 --- a/device-gpu.c +++ b/device-gpu.c @@ -269,39 +269,6 @@ char *set_gpu_vddc(char *arg) return NULL; } -char *set_temp_cutoff(char *arg) -{ - int i, val = 0, device = 0, *tco; - char *nextptr; - - nextptr = strtok(arg, ","); - if (nextptr == NULL) - return "Invalid parameters for set temp cutoff"; - val = atoi(nextptr); - if (val < 0 || val > 200) - return "Invalid value passed to set temp cutoff"; - - tco = &gpus[device++].adl.cutofftemp; - *tco = val; - - while ((nextptr = strtok(NULL, ",")) != NULL) { - val = atoi(nextptr); - if (val < 0 || val > 200) - return "Invalid value passed to set temp cutoff"; - - tco = &gpus[device++].adl.cutofftemp; - *tco = val; - } - if (device == 1) { - for (i = device; i < MAX_GPUDEVICES; i++) { - tco = &gpus[i].adl.cutofftemp; - *tco = val; - } - } - - return NULL; -} - char *set_temp_overheat(char *arg) { int i, val = 0, device = 0, *to; diff --git a/device-gpu.h b/device-gpu.h index 1e256137..84a855a1 100644 --- a/device-gpu.h +++ b/device-gpu.h @@ -12,7 +12,6 @@ extern char *set_gpu_memclock(char *arg); extern char *set_gpu_memdiff(char *arg); extern char *set_gpu_powertune(char *arg); extern char *set_gpu_vddc(char *arg); -extern char *set_temp_cutoff(char *arg); extern char *set_temp_overheat(char *arg); extern char *set_temp_target(char *arg); extern char *set_intensity(char *arg); diff --git a/miner.h b/miner.h index 63765a39..f1160e00 100644 --- a/miner.h +++ b/miner.h @@ -221,7 +221,6 @@ struct gpu_adl { int targetfan; int targettemp; int overtemp; - int cutofftemp; int minspeed; int maxspeed; @@ -280,6 +279,10 @@ struct cgpu_info { int virtual_gpu; bool dynamic; int intensity; + + float temp; + int cutofftemp; + #ifdef HAVE_ADL bool has_adl; struct gpu_adl adl; @@ -590,6 +593,7 @@ extern int total_accepted, total_rejected; extern int total_getworks, total_stale, total_discarded; extern unsigned int local_work; extern unsigned int total_go, total_ro; +extern const int opt_cutofftemp; extern int opt_log_interval; #ifdef HAVE_OPENCL @@ -686,6 +690,7 @@ enum cl_kernels { extern void get_datestamp(char *, struct timeval *); bool submit_nonce(struct thr_info *thr, struct work *work, uint32_t nonce); +extern void tailsprintf(char *f, const char *fmt, ...); extern void wlogprint(const char *f, ...); extern int curses_int(const char *query); extern char *curses_input(const char *query);