diff --git a/adl.c b/adl.c index 6f973ff4..baa1076b 100644 --- a/adl.c +++ b/adl.c @@ -212,6 +212,15 @@ void init_adl(int nDevs) continue; } + if (!gpus[gpu].enabled) { + gpus[i].gpu_engine = + gpus[i].gpu_memclock = + gpus[i].gpu_vddc = + gpus[i].gpu_fan = + gpus[i].gpu_powertune = 0; + continue; + } + gpus[gpu].has_adl = true; /* Flag adl as active if any card is successfully activated */ adl_active = true; diff --git a/api.c b/api.c index fe29b947..86c7c2d7 100644 --- a/api.c +++ b/api.c @@ -387,7 +387,7 @@ void gpustatus(int gpu, bool isjson) #endif gt = gv = gm = gc = ga = gf = gp = pt = 0; - if (gpu_devices[gpu]) + if (cgpu->enabled) enabled = (char *)YES; else enabled = (char *)NO; @@ -662,13 +662,13 @@ void gpuenable(SOCKETTYPE c, char *param, bool isjson) return; } - if (gpu_devices[id]) { + if (gpus[id].enabled) { strcpy(io_buffer, message(MSG_ALRENA, id, isjson)); return; } for (i = 0; i < gpu_threads; i++) { - gpu = thr_info[i].cgpu->cpu_gpu; + gpu = thr_info[i].cgpu->device_id; if (gpu == id) { thr = &thr_info[i]; if (thr->cgpu->status != LIFE_WELL) { @@ -676,7 +676,7 @@ void gpuenable(SOCKETTYPE c, char *param, bool isjson) return; } - gpu_devices[id] = true; + gpus[id].enabled = true; tq_push(thr->q, &ping); } @@ -705,12 +705,12 @@ void gpudisable(SOCKETTYPE c, char *param, bool isjson) return; } - if (!gpu_devices[id]) { + if (!gpus[id].enabled) { strcpy(io_buffer, message(MSG_ALRDIS, id, isjson)); return; } - gpu_devices[id] = false; + gpus[id].enabled = false; strcpy(io_buffer, message(MSG_GPUDIS, id, isjson)); } diff --git a/main.c b/main.c index d98341e8..de4b27c9 100644 --- a/main.c +++ b/main.c @@ -1,6 +1,7 @@ /* * Copyright 2011 Con Kolivas + * Copyright 2011 Luke Dashjr * Copyright 2010 Jeff Garzik * * This program is free software; you can redistribute it and/or modify it @@ -145,7 +146,6 @@ const char *algo_names[] = { #endif }; -typedef void (*sha256_func)(); static const sha256_func sha256_funcs[] = { [ALGO_C] = (sha256_func)scanhash_c, #ifdef WANT_SSE2_4WAY @@ -205,12 +205,14 @@ enum sha256_algos opt_algo = ALGO_C; #endif int nDevs; static int opt_g_threads = 2; -static int opt_device; -static int total_devices; -bool gpu_devices[MAX_GPUDEVICES]; +static signed int devices_enabled = 0; +static bool opt_removedisabled = false; +int total_devices = 0; +struct cgpu_info *devices[MAX_DEVICES]; +bool have_opencl = false; int gpu_threads; static bool forced_n_threads; -int opt_n_threads; +int opt_n_threads = -1; int mining_threads; int num_processors; bool use_curses = true; @@ -485,34 +487,20 @@ static double bench_algo_stage3( struct timeval end; struct timeval start; uint32_t max_nonce = (1<<22); - unsigned long hashes_done = 0; + uint32_t last_nonce = 0; gettimeofday(&start, 0); - #if defined(WANT_VIA_PADLOCK) - - // For some reason, the VIA padlock hasher has a different API ... - if (ALGO_VIA==algo) { - (void)scanhash_via( - 0, - work.data, - work.target, - max_nonce, - &hashes_done, - work.blk.nonce - ); - } else - #endif { sha256_func func = sha256_funcs[algo]; (*func)( 0, work.midstate, - work.data + 64, + work.data, work.hash1, work.hash, work.target, max_nonce, - &hashes_done, + &last_nonce, work.blk.nonce ); } @@ -525,7 +513,7 @@ static double bench_algo_stage3( double rate = -1.0; if (0 15) - return "Invalid GPU device number"; - total_devices++; - gpu_devices[*i] = true; + if (i < 0 || i >= (sizeof(devices_enabled) * 8) - 1) + return "Invalid device number"; + devices_enabled |= 1 << i; return NULL; } @@ -1545,10 +1536,10 @@ static struct opt_table opt_config_table[] = { OPT_WITHOUT_ARG("--debug|-D", enable_debug, &opt_debug, "Enable debug output"), -#ifdef HAVE_OPENCL OPT_WITH_ARG("--device|-d", - set_devices, NULL, &opt_device, + set_devices, NULL, NULL, "Select device to use, (Use repeat -d for multiple devices, default: all)"), +#ifdef HAVE_OPENCL OPT_WITHOUT_ARG("--disable-gpu|-G", opt_set_bool, &opt_nogpu, "Disable GPU mining even if suitable devices exist"), @@ -1643,6 +1634,9 @@ static struct opt_table opt_config_table[] = { OPT_WITHOUT_ARG("--real-quiet", opt_set_bool, &opt_realquiet, "Disable all output"), + OPT_WITHOUT_ARG("--remove-disabled", + opt_set_bool, &opt_removedisabled, + "Remove disabled devices entirely, as if they didn't exist"), OPT_WITH_ARG("--retries|-r", opt_set_intval, opt_show_intval, &opt_retries, "Number of times to retry before giving up, if JSON-RPC call fails (-1 means never)"), @@ -1929,7 +1923,7 @@ err_out: static inline int dev_from_id(int thr_id) { - return thr_info[thr_id].cgpu->cpu_gpu; + return thr_info[thr_id].cgpu->device_id; } /* Make the change in the recent value adjust dynamically when the difference @@ -1965,7 +1959,7 @@ static int requests_staged(void) static WINDOW *mainwin, *statuswin, *logwin; double total_secs = 0.1; static char statusline[256]; -static int cpucursor, gpucursor, logstart, logcursor; +static int devcursor, logstart, logcursor; struct cgpu_info gpus[MAX_GPUDEVICES]; /* Maximum number apparently possible */ struct cgpu_info *cpus; @@ -2001,24 +1995,7 @@ static void tailsprintf(char *f, const char *fmt, ...) static void get_statline(char *buf, struct cgpu_info *cgpu) { - sprintf(buf, "%sPU%d ", cgpu->is_gpu ? "G" : "C", cgpu->cpu_gpu); -#ifdef HAVE_ADL - if (cgpu->has_adl) { - int gpu = cgpu->cpu_gpu; - float gt = gpu_temp(gpu); - int gf = gpu_fanspeed(gpu); - int gp = gpu_fanpercent(gpu); - - if (gt != -1) - tailsprintf(buf, "%.1fC ", gt); - if (gf != -1) - tailsprintf(buf, "%dRPM ", gf); - else if (gp != -1) - tailsprintf(buf, "%d%% ", gp); - if (gt > -1 || gf > -1 || gp > -1) - tailsprintf(buf, "| "); - } -#endif + sprintf(buf, "%s%d ", cgpu->api->name, cgpu->device_id); tailsprintf(buf, "(%ds):%.1f (avg):%.1f Mh/s | A:%d R:%d HW:%d U:%.2f/m", opt_log_interval, cgpu->rolling, @@ -2027,8 +2004,8 @@ static void get_statline(char *buf, struct cgpu_info *cgpu) cgpu->rejected, cgpu->hw_errors, cgpu->utility); - if (cgpu->is_gpu) - tailsprintf(buf, " I:%d", cgpu->intensity); + if (cgpu->api->get_statline) + cgpu->api->get_statline(buf, cgpu); } static void text_print_status(int thr_id) @@ -2069,8 +2046,8 @@ static void curses_print_status(void) mvwprintw(statuswin, 5, 0, " Block: %s... Started: %s", current_hash, blocktime); mvwhline(statuswin, 6, 0, '-', 80); mvwhline(statuswin, logstart - 1, 0, '-', 80); - mvwprintw(statuswin, gpucursor - 1, 1, "[P]ool management %s[S]ettings [D]isplay options [Q]uit", - gpu_threads ? "[G]PU management " : ""); + mvwprintw(statuswin, devcursor - 1, 1, "[P]ool management %s[S]ettings [D]isplay options [Q]uit", + have_opencl ? "[G]PU management " : ""); /* The window will be updated once we're done with all the devices */ wnoutrefresh(statuswin); } @@ -2084,38 +2061,17 @@ static void adj_width(int var, int *length) static void curses_print_devstatus(int thr_id) { static int awidth = 1, rwidth = 1, hwwidth = 1, uwidth = 1; - - if (thr_id >= 0 && thr_id < gpu_threads) { - int gpu = dev_from_id(thr_id); - struct cgpu_info *cgpu = &gpus[gpu]; + struct cgpu_info *cgpu = thr_info[thr_id].cgpu; + char logline[255]; cgpu->utility = cgpu->accepted / ( total_secs ? total_secs : 1 ) * 60; - mvwprintw(statuswin, gpucursor + gpu, 0, " GPU %d: ", gpu); -#ifdef HAVE_ADL - if (cgpu->has_adl) { - float gt = gpu_temp(gpu); - int gf = gpu_fanspeed(gpu); - int gp = gpu_fanpercent(gpu); - - if (gt != -1) - wprintw(statuswin, "%5.1fC ", gt); - else - wprintw(statuswin, " "); - if (gf != -1) - wprintw(statuswin, "%4dRPM ", gf); - else if (gp != -1) - wprintw(statuswin, "%3d%% ", gp); - else - wprintw(statuswin, " "); - wprintw(statuswin, "| "); - } -#endif + mvwprintw(statuswin, devcursor + cgpu->cgminer_id, 0, " %s %d: ", cgpu->api->name, cgpu->device_id); if (cgpu->status == LIFE_DEAD) wprintw(statuswin, "DEAD "); else if (cgpu->status == LIFE_SICK) wprintw(statuswin, "SICK "); - else if (!gpu_devices[gpu]) + else if (!cgpu->enabled) wprintw(statuswin, "OFF "); else wprintw(statuswin, "%5.1f", cgpu->rolling); @@ -2123,30 +2079,20 @@ static void curses_print_devstatus(int thr_id) adj_width(cgpu->rejected, &rwidth); adj_width(cgpu->hw_errors, &hwwidth); adj_width(cgpu->utility, &uwidth); - wprintw(statuswin, "/%5.1fMh/s | A:%*d R:%*d HW:%*d U:%*.2f/m I:%2d", + wprintw(statuswin, "/%5.1fMh/s | A:%*d R:%*d HW:%*d U:%*.2f/m", cgpu->total_mhashes / total_secs, awidth, cgpu->accepted, rwidth, cgpu->rejected, hwwidth, cgpu->hw_errors, - uwidth + 3, cgpu->utility, - gpus[gpu].intensity); - wclrtoeol(statuswin); - } else if (thr_id >= gpu_threads) { - int cpu = dev_from_id(thr_id); - struct cgpu_info *cgpu = &cpus[cpu]; + uwidth + 3, cgpu->utility); - cgpu->utility = cgpu->accepted / ( total_secs ? total_secs : 1 ) * 60; + if (cgpu->api->get_statline) { + logline[0] = '\0'; + cgpu->api->get_statline(logline, cgpu); + wprintw(statuswin, "%s", logline); + } - adj_width(cgpu->accepted, &awidth); - adj_width(cgpu->rejected, &rwidth); - adj_width(cgpu->utility, &uwidth); - mvwprintw(statuswin, cpucursor + cpu, 0, " CPU %d: %5.2f/%5.2fMh/s | A:%*d R:%*d U:%*.2f/m", - cpu, cgpu->rolling, cgpu->total_mhashes / total_secs, - awidth, cgpu->accepted, - rwidth, cgpu->rejected, - uwidth + 3, cgpu->utility); wclrtoeol(statuswin); - } wnoutrefresh(statuswin); } @@ -2365,14 +2311,14 @@ static bool submit_upstream_work(const struct work *work) applog(LOG_DEBUG, "PROOF OF WORK RESULT: true (yay!!!)"); if (!QUIET) { if (donor(work->pool)) - applog(LOG_NOTICE, "Accepted %s %sPU %d thread %d donate", - hashshow, cgpu->is_gpu? "G" : "C", cgpu->cpu_gpu, thr_id); + applog(LOG_NOTICE, "Accepted %s %s %d thread %d donate", + hashshow, cgpu->api->name, cgpu->device_id, thr_id); else if (total_pools > 1) - applog(LOG_NOTICE, "Accepted %s %sPU %d thread %d pool %d", - hashshow, cgpu->is_gpu? "G" : "C", cgpu->cpu_gpu, thr_id, work->pool->pool_no); + applog(LOG_NOTICE, "Accepted %s %s %d thread %d pool %d", + hashshow, cgpu->api->name, cgpu->device_id, thr_id, work->pool->pool_no); else - applog(LOG_NOTICE, "Accepted %s %sPU %d thread %d", - hashshow, cgpu->is_gpu? "G" : "C", cgpu->cpu_gpu, thr_id); + applog(LOG_NOTICE, "Accepted %s %s %d thread %d", + hashshow, cgpu->api->name, cgpu->device_id, thr_id); } if (opt_shares && total_accepted >= opt_shares) { applog(LOG_WARNING, "Successfully mined %d accepted shares as requested and exiting.", opt_shares); @@ -2387,14 +2333,14 @@ static bool submit_upstream_work(const struct work *work) applog(LOG_DEBUG, "PROOF OF WORK RESULT: false (booooo)"); if (!QUIET) { if (donor(work->pool)) - applog(LOG_NOTICE, "Rejected %s %sPU %d thread %d donate", - hashshow, cgpu->is_gpu? "G" : "C", cgpu->cpu_gpu, thr_id); + applog(LOG_NOTICE, "Rejected %s %s %d thread %d donate", + hashshow, cgpu->api->name, cgpu->device_id, thr_id); else if (total_pools > 1) - applog(LOG_NOTICE, "Rejected %s %sPU %d thread %d pool %d", - hashshow, cgpu->is_gpu? "G" : "C", cgpu->cpu_gpu, thr_id, work->pool->pool_no); + applog(LOG_NOTICE, "Rejected %s %s %d thread %d pool %d", + hashshow, cgpu->api->name, cgpu->device_id, thr_id, work->pool->pool_no); else - applog(LOG_NOTICE, "Rejected %s %sPU %d thread %d", - hashshow, cgpu->is_gpu? "G" : "C", cgpu->cpu_gpu, thr_id); + applog(LOG_NOTICE, "Rejected %s %s %d thread %d", + hashshow, cgpu->api->name, cgpu->device_id, thr_id); } } @@ -3228,11 +3174,11 @@ static void write_config(FILE *fcfg) if (schedstop.enable) fprintf(fcfg, ",\n\"stop-time\" : \"%d:%d\"", schedstop.tm.tm_hour, schedstop.tm.tm_min); for(i = 0; i < nDevs; i++) - if (!gpu_devices[i]) + if (!gpus[i].enabled) break; if (i < nDevs) for(i = 0; i < nDevs; i++) - if (gpu_devices[i]) + if (gpus[i].enabled) fprintf(fcfg, ",\n\"device\" : \"%d\"", i); if (strcmp(opt_api_description, PACKAGE_STRING) != 0) fprintf(fcfg, ",\n\"api-description\" : \"%s\"", opt_api_description); @@ -3558,6 +3504,7 @@ retry: #ifdef HAVE_OPENCL void reinit_device(struct cgpu_info *cgpu); +struct device_api opencl_api; static void manage_gpu(void) { @@ -3626,7 +3573,7 @@ retry: if (thr->cgpu != cgpu) continue; get_datestamp(checkin, &thr->last); - wlog("Thread %d: %.1f Mh/s %s ", i, thr->rolling, gpu_devices[gpu] ? "Enabled" : "Disabled"); + wlog("Thread %d: %.1f Mh/s %s ", i, thr->rolling, cgpu->enabled ? "Enabled" : "Disabled"); switch (cgpu->status) { default: case LIFE_WELL: @@ -3657,24 +3604,29 @@ retry: else selected = -1; if (!strncasecmp(&input, "e", 1)) { + struct cgpu_info *cgpu; + if (selected) selected = curses_int("Select GPU to enable"); if (selected < 0 || selected >= nDevs) { wlogprint("Invalid selection\n"); goto retry; } - if (gpu_devices[selected]) { + if (gpus[selected].enabled) { wlogprint("Device already enabled\n"); goto retry; } - gpu_devices[selected] = true; - for (i = 0; i < gpu_threads; i++) { + gpus[selected].enabled = true; + for (i = 0; i < mining_threads; ++i) { + thr = &thr_info[i]; + cgpu = thr->cgpu; + if (cgpu->api != &opencl_api) + continue; if (dev_from_id(i) != selected) continue; - thr = &thr_info[i]; - if (thr->cgpu->status != LIFE_WELL) { + if (cgpu->status != LIFE_WELL) { wlogprint("Must restart device before enabling it"); - gpu_devices[selected] = false; + gpus[selected].enabled = false; goto retry; } if (opt_debug) @@ -3690,11 +3642,11 @@ retry: wlogprint("Invalid selection\n"); goto retry; } - if (!gpu_devices[selected]) { + if (!gpus[selected].enabled) { wlogprint("Device already disabled\n"); goto retry; } - gpu_devices[selected] = false; + gpus[selected].enabled = false; goto retry; } else if (!strncasecmp(&input, "i", 1)) { int intensity; @@ -3778,7 +3730,7 @@ static void *input_thread(void *userdata) display_pools(); else if (!strncasecmp(&input, "s", 1)) set_options(); - else if (gpu_threads && !strncasecmp(&input, "g", 1)) + else if (have_opencl && !strncasecmp(&input, "g", 1)) manage_gpu(); if (opt_realquiet) { disable_curses(); @@ -4348,230 +4300,149 @@ bool submit_nonce(struct thr_info *thr, struct work *work, uint32_t nonce) return submit_work_sync(thr, work); } +static inline bool abandon_work(int thr_id, struct work *work, struct timeval *wdiff, uint64_t hashes) +{ + if (wdiff->tv_sec > opt_scantime || + work->blk.nonce >= MAXTHREADS - hashes || + stale_work(work, false)) + return true; + return false; +} + static void *miner_thread(void *userdata) { - struct work *work = make_work(); struct thr_info *mythr = userdata; const int thr_id = mythr->id; - uint32_t max_nonce = 0xffffff, total_hashes = 0; - unsigned long hashes_done = max_nonce; - bool needs_work = true; + struct cgpu_info *cgpu = mythr->cgpu; + struct device_api *api = cgpu->api; + /* Try to cycle approximately 5 times before each log update */ - const unsigned long cycle = opt_log_interval / 5 ? : 1; + const unsigned long def_cycle = opt_log_interval / 5 ? : 1; + unsigned long cycle; + struct timeval tv_start, tv_end, tv_workstart, tv_lastupdate; + struct timeval diff, sdiff, wdiff; + uint32_t max_nonce = api->can_limit_work ? api->can_limit_work(mythr) : 0xffffffff; + uint32_t hashes_done = 0; + uint32_t hashes; + struct work *work = make_work(); unsigned const int request_interval = opt_scantime * 2 / 3 ? : 1; + unsigned const long request_nonce = MAXTHREADS / 3 * 2; bool requested = false; - uint32_t nonce_inc = max_nonce, hash_div = 1; - double hash_divfloat = 1.0; - + uint32_t hash_div = 1; pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); - /* Set worker threads to nice 19 and then preferentially to SCHED_IDLE - * and if that fails, then SCHED_BATCH. No need for this to be an - * error if it fails */ - setpriority(PRIO_PROCESS, 0, 19); - drop_policy(); + if (api->thread_init && !api->thread_init(mythr)) + goto out; - /* Cpu affinity only makes sense if the number of threads is a multiple - * of the number of CPUs */ - if (!(opt_n_threads % num_processors)) - affine_to_cpu(thr_id - gpu_threads, dev_from_id(thr_id)); + if (opt_debug) + applog(LOG_DEBUG, "Popping ping in miner thread"); + tq_pop(mythr->q, NULL); /* Wait for a ping to start */ - /* Invalidate pool so it fails can_roll() test */ - work->pool = NULL; + sdiff.tv_sec = sdiff.tv_usec = 0; + gettimeofday(&tv_lastupdate, NULL); while (1) { - struct timeval tv_workstart, tv_start, tv_end, diff; - uint64_t max64; - bool rc; - - if (needs_work) { - gettimeofday(&tv_workstart, NULL); - /* obtain new work from internal workio thread */ - if (unlikely(!get_work(work, requested, mythr, thr_id, hash_div))) { - applog(LOG_ERR, "work retrieval failed, exiting " - "mining thread %d", thr_id); - goto out; - } - needs_work = requested = false; - total_hashes = 0; - max_nonce = work->blk.nonce + hashes_done; + work_restart[thr_id].restart = 0; + if (api->free_work && likely(work->pool)) + api->free_work(mythr, work); + if (unlikely(!get_work(work, requested, mythr, thr_id, hash_div))) { + applog(LOG_ERR, "work retrieval failed, exiting " + "mining thread %d", thr_id); + break; } - hashes_done = 0; - gettimeofday(&tv_start, NULL); - - /* scan nonces for a proof-of-work hash */ - switch (opt_algo) { - case ALGO_C: - rc = scanhash_c(thr_id, work->midstate, work->data + 64, - work->hash1, work->hash, work->target, - max_nonce, &hashes_done, - work->blk.nonce); + requested = false; + cycle = (can_roll(work) && should_roll(work)) ? 1 : def_cycle; + gettimeofday(&tv_workstart, NULL); + work->blk.nonce = 0; + if (api->prepare_work && !api->prepare_work(mythr, work)) { + applog(LOG_ERR, "work prepare failed, exiting " + "mining thread %d", thr_id); break; + } -#ifdef WANT_X8632_SSE2 - case ALGO_SSE2_32: { - unsigned int rc5 = - scanhash_sse2_32(thr_id, work->midstate, work->data + 64, - work->hash1, work->hash, - work->target, - max_nonce, &hashes_done, - work->blk.nonce); - rc = (rc5 == -1) ? false : true; - } - break; -#endif + do { + gettimeofday(&tv_start, NULL); -#ifdef WANT_X8664_SSE2 - case ALGO_SSE2_64: { - unsigned int rc5 = - scanhash_sse2_64(thr_id, work->midstate, work->data + 64, - work->hash1, work->hash, - work->target, - max_nonce, &hashes_done, - work->blk.nonce); - rc = (rc5 == -1) ? false : true; + hashes = api->scanhash(mythr, work, work->blk.nonce + max_nonce); + if (unlikely(work_restart[thr_id].restart)) + break; + if (unlikely(!hashes)) + goto out; + hashes_done += hashes; + + gettimeofday(&tv_end, NULL); + timeval_subtract(&diff, &tv_end, &tv_start); + sdiff.tv_sec += diff.tv_sec; + sdiff.tv_usec += diff.tv_usec; + if (sdiff.tv_usec > 1000000) { + ++sdiff.tv_sec; + sdiff.tv_usec -= 1000000; } - break; -#endif -#ifdef WANT_X8664_SSE4 - case ALGO_SSE4_64: { - unsigned int rc5 = - scanhash_sse4_64(thr_id, work->midstate, work->data + 64, - work->hash1, work->hash, - work->target, - max_nonce, &hashes_done, - work->blk.nonce); - rc = (rc5 == -1) ? false : true; - } - break; + timeval_subtract(&wdiff, &tv_end, &tv_workstart); + if (!requested) { +#if 0 + if (wdiff.tv_sec > request_interval) + hash_div = (MAXTHREADS / total_hashes) ? : 1; #endif - -#ifdef WANT_SSE2_4WAY - case ALGO_4WAY: { - unsigned int rc4 = - ScanHash_4WaySSE2(thr_id, work->midstate, work->data + 64, - work->hash1, work->hash, - work->target, - max_nonce, &hashes_done, - work->blk.nonce); - rc = (rc4 == -1) ? false : true; + if (wdiff.tv_sec > request_interval || work->blk.nonce > request_nonce) { + thread_reportout(mythr); + if (unlikely(!queue_request(mythr, false))) { + applog(LOG_ERR, "Failed to queue_request in miner_thread %d", thr_id); + goto out; + } + thread_reportin(mythr); + requested = true; + } } - break; -#endif - -#ifdef WANT_ALTIVEC_4WAY - case ALGO_ALTIVEC_4WAY: - { - unsigned int rc4 = ScanHash_altivec_4way(thr_id, work->midstate, work->data + 64, - work->hash1, work->hash, - work->target, - max_nonce, &hashes_done, - work->blk.nonce); - rc = (rc4 == -1) ? false : true; - } - break; -#endif - -#ifdef WANT_VIA_PADLOCK - case ALGO_VIA: - rc = scanhash_via(thr_id, work->data, work->target, - max_nonce, &hashes_done, - work->blk.nonce); - break; -#endif - case ALGO_CRYPTOPP: - rc = scanhash_cryptopp(thr_id, work->midstate, work->data + 64, - work->hash1, work->hash, work->target, - max_nonce, &hashes_done, - work->blk.nonce); - break; -#ifdef WANT_CRYPTOPP_ASM32 - case ALGO_CRYPTOPP_ASM32: - rc = scanhash_asm32(thr_id, work->midstate, work->data + 64, - work->hash1, work->hash, work->target, - max_nonce, &hashes_done, - work->blk.nonce); - break; -#endif + if (sdiff.tv_sec < cycle) { + if (likely(!api->can_limit_work || max_nonce == 0xffffffff)) + continue; - default: - /* should never happen */ - goto out; - } + { + int mult = 1000000 / ((sdiff.tv_usec + 0x400) / 0x400) + 0x10; + mult *= cycle; + if (max_nonce > (0xffffffff * 0x400) / mult) + max_nonce = 0xffffffff; + else + max_nonce = (max_nonce * mult) / 0x400; + } + } else if (unlikely(sdiff.tv_sec > cycle) && api->can_limit_work) { + max_nonce = max_nonce * cycle / sdiff.tv_sec; + } else if (unlikely(sdiff.tv_usec > 100000) && api->can_limit_work) { + max_nonce = max_nonce * 0x400 / (((cycle * 1000000) + sdiff.tv_usec) / (cycle * 1000000 / 0x400)); + } - /* record scanhash elapsed time */ - gettimeofday(&tv_end, NULL); - timeval_subtract(&diff, &tv_end, &tv_start); - - hashes_done -= work->blk.nonce; - hashmeter(thr_id, &diff, hashes_done); - total_hashes += hashes_done; - work->blk.nonce += hashes_done; - - /* adjust max_nonce to meet target cycle time */ - if (diff.tv_usec > 500000) - diff.tv_sec++; - if (diff.tv_sec && diff.tv_sec != cycle) { - uint64_t next_inc = ((uint64_t)hashes_done * (uint64_t)cycle) / (uint64_t)diff.tv_sec; - - if (next_inc > (uint64_t)nonce_inc / 2 * 3) - next_inc = nonce_inc / 2 * 3; - nonce_inc = next_inc; - } else if (!diff.tv_sec) - nonce_inc = hashes_done * 2; - if (nonce_inc < 4) - nonce_inc = 0xffffff; - max64 = work->blk.nonce + nonce_inc; - if (max64 > 0xfffffffaULL) - max64 = 0xfffffffaULL; - max_nonce = max64; - - /* if nonce found, submit work */ - if (unlikely(rc)) { - if (opt_debug) - applog(LOG_DEBUG, "CPU %d found something?", dev_from_id(thr_id)); - if (unlikely(!submit_work_sync(mythr, work))) { - applog(LOG_ERR, "Failed to submit_work_sync in miner_thread %d", thr_id); - break; + timeval_subtract(&diff, &tv_end, &tv_lastupdate); + if (diff.tv_sec >= opt_log_interval) { + hashmeter(thr_id, &diff, hashes_done); + hashes_done = 0; + tv_lastupdate = tv_end; } - work->blk.nonce += 4; - } - timeval_subtract(&diff, &tv_end, &tv_workstart); - if (!requested && (diff.tv_sec >= request_interval)) { - thread_reportout(mythr); - if (unlikely(!queue_request(mythr, false))) { - applog(LOG_ERR, "Failed to queue_request in miner_thread %d", thr_id); - goto out; + if (unlikely(mythr->pause || !cgpu->enabled)) { + applog(LOG_WARNING, "Thread %d being disabled", thr_id); + mythr->rolling = mythr->cgpu->rolling = 0; + if (opt_debug) + applog(LOG_DEBUG, "Popping wakeup ping in miner thread"); + thread_reportout(mythr); + tq_pop(mythr->q, NULL); /* Ignore ping that's popped */ + thread_reportin(mythr); + applog(LOG_WARNING, "Thread %d being re-enabled", thr_id); } - thread_reportin(mythr); - requested = true; - } - if (diff.tv_sec > opt_scantime) { - decay_time(&hash_divfloat , (double)((MAXTHREADS / total_hashes) ? : 1)); - hash_div = hash_divfloat; - needs_work = true; - } else if (work_restart[thr_id].restart || stale_work(work, false) || - work->blk.nonce >= MAXTHREADS - hashes_done) - needs_work = true; - - if (unlikely(mythr->pause)) { - applog(LOG_WARNING, "Thread %d being disabled", thr_id); - mythr->rolling = mythr->cgpu->rolling = 0; - if (opt_debug) - applog(LOG_DEBUG, "Popping wakeup ping in miner thread"); + sdiff.tv_sec = sdiff.tv_usec = 0; - thread_reportout(mythr); - tq_pop(mythr->q, NULL); /* Ignore ping that's popped */ - thread_reportin(mythr); - applog(LOG_WARNING, "Thread %d being re-enabled", thr_id); - } + if (can_roll(work) && should_roll(work)) + roll_work(work); + } while (!abandon_work(thr_id, work, &wdiff, hashes)); } out: + if (api->thread_shutdown) + api->thread_shutdown(mythr); + thread_reportin(mythr); applog(LOG_ERR, "Thread %d failure, exiting", thr_id); tq_freeze(mythr->q); @@ -4679,224 +4550,6 @@ static void set_threads_hashes(unsigned int vectors, unsigned int *threads, *globalThreads = *threads; *hashes = *threads * vectors; } - -static void *gpuminer_thread(void *userdata) -{ - cl_int (*queue_kernel_parameters)(_clState *, dev_blk_ctx *); - - const unsigned long cycle = opt_log_interval / 5 ? : 1; - struct timeval tv_start, tv_end, diff, tv_workstart; - struct thr_info *mythr = userdata; - const int thr_id = mythr->id; - uint32_t *res, *blank_res; - double gpu_ms_average = 7; - int gpu = dev_from_id(thr_id); - - size_t globalThreads[1]; - size_t localThreads[1]; - - cl_int status; - - _clState *clState = clStates[thr_id]; - const cl_kernel *kernel = &clState->kernel; - - struct work *work = make_work(); - unsigned int threads; - unsigned const int vectors = clState->preferred_vwidth; - unsigned int hashes; - unsigned int hashes_done = 0; - - /* Request the next work item at 2/3 of the scantime */ - unsigned const int request_interval = opt_scantime * 2 / 3 ? : 1; - unsigned const long request_nonce = MAXTHREADS / 3 * 2; - bool requested = false; - uint32_t total_hashes = 0, hash_div = 1; - - switch (chosen_kernel) { - case KL_POCLBM: - queue_kernel_parameters = &queue_poclbm_kernel; - break; - case KL_PHATK: - default: - queue_kernel_parameters = &queue_phatk_kernel; - break; - } - - pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); - - res = calloc(BUFFERSIZE, 1); - blank_res = calloc(BUFFERSIZE, 1); - - if (!res || !blank_res) { - applog(LOG_ERR, "Failed to calloc in gpuminer_thread"); - goto out; - } - - gettimeofday(&tv_start, NULL); - localThreads[0] = clState->work_size; - set_threads_hashes(vectors, &threads, &hashes, &globalThreads[0], - localThreads[0], gpus[gpu].intensity); - - diff.tv_sec = 0; - gettimeofday(&tv_end, NULL); - - work->pool = NULL; - - status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0, - BUFFERSIZE, blank_res, 0, NULL, NULL); - if (unlikely(status != CL_SUCCESS)) - { applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; } - - mythr->cgpu->status = LIFE_WELL; - if (opt_debug) - applog(LOG_DEBUG, "Popping ping in gpuminer thread"); - - tq_pop(mythr->q, NULL); /* Wait for a ping to start */ - gettimeofday(&tv_workstart, NULL); - /* obtain new work from internal workio thread */ - if (unlikely(!get_work(work, requested, mythr, thr_id, hash_div))) { - applog(LOG_ERR, "work retrieval failed, exiting " - "gpu mining thread %d", thr_id); - goto out; - } - requested = false; - precalc_hash(&work->blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data + 64)); - work->blk.nonce = 0; - - while (1) { - struct timeval tv_gpustart, tv_gpuend; - suseconds_t gpu_us; - - gettimeofday(&tv_gpustart, NULL); - timeval_subtract(&diff, &tv_gpustart, &tv_gpuend); - /* This finish flushes the readbuffer set with CL_FALSE later */ - clFinish(clState->commandQueue); - gettimeofday(&tv_gpuend, NULL); - timeval_subtract(&diff, &tv_gpuend, &tv_gpustart); - gpu_us = diff.tv_sec * 1000000 + diff.tv_usec; - decay_time(&gpu_ms_average, gpu_us / 1000); - if (gpus[gpu].dynamic) { - /* Try to not let the GPU be out for longer than 6ms, but - * increase intensity when the system is idle, unless - * dynamic is disabled. */ - if (gpu_ms_average > 7) { - if (gpus[gpu].intensity > -10) - gpus[gpu].intensity--; - } else if (gpu_ms_average < 3) { - if (gpus[gpu].intensity < 10) - gpus[gpu].intensity++; - } - } - set_threads_hashes(vectors, &threads, &hashes, globalThreads, - localThreads[0], gpus[gpu].intensity); - - if (diff.tv_sec > opt_scantime || - work->blk.nonce >= MAXTHREADS - hashes || - work_restart[thr_id].restart || - stale_work(work, false)) { - /* Ignore any reads since we're getting new work and queue a clean buffer */ - status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0, - BUFFERSIZE, blank_res, 0, NULL, NULL); - if (unlikely(status != CL_SUCCESS)) - { applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; } - memset(res, 0, BUFFERSIZE); - - gettimeofday(&tv_workstart, NULL); - if (opt_debug) - applog(LOG_DEBUG, "getwork thread %d", thr_id); - /* obtain new work from internal workio thread */ - if (unlikely(!get_work(work, requested, mythr, thr_id, hash_div))) { - applog(LOG_ERR, "work retrieval failed, exiting " - "gpu mining thread %d", thr_id); - goto out; - } - requested = false; - - precalc_hash(&work->blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data + 64)); - work_restart[thr_id].restart = 0; - - /* Flushes the writebuffer set with CL_FALSE above */ - clFinish(clState->commandQueue); - } - status = queue_kernel_parameters(clState, &work->blk); - if (unlikely(status != CL_SUCCESS)) - { applog(LOG_ERR, "Error: clSetKernelArg of all params failed."); goto out; } - - /* MAXBUFFERS entry is used as a flag to say nonces exist */ - if (res[FOUND]) { - /* Clear the buffer again */ - status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0, - BUFFERSIZE, blank_res, 0, NULL, NULL); - if (unlikely(status != CL_SUCCESS)) - { applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); goto out; } - if (opt_debug) - applog(LOG_DEBUG, "GPU %d found something?", gpu); - postcalc_hash_async(mythr, work, res); - memset(res, 0, BUFFERSIZE); - clFinish(clState->commandQueue); - } - - status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL, - globalThreads, localThreads, 0, NULL, NULL); - if (unlikely(status != CL_SUCCESS)) - { applog(LOG_ERR, "Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)"); goto out; } - - status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0, - BUFFERSIZE, res, 0, NULL, NULL); - if (unlikely(status != CL_SUCCESS)) - { applog(LOG_ERR, "Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer)"); goto out;} - - gettimeofday(&tv_end, NULL); - timeval_subtract(&diff, &tv_end, &tv_start); - hashes_done += hashes; - total_hashes += hashes; - work->blk.nonce += hashes; - if (diff.tv_sec >= cycle) { - hashmeter(thr_id, &diff, hashes_done); - gettimeofday(&tv_start, NULL); - hashes_done = 0; - } - - timeval_subtract(&diff, &tv_end, &tv_workstart); - if (!requested) { -#if 0 - if (diff.tv_sec > request_interval) - hash_div = (MAXTHREADS / total_hashes) ? : 1; -#endif - if (diff.tv_sec > request_interval || work->blk.nonce > request_nonce) { - thread_reportout(mythr); - if (unlikely(!queue_request(mythr, false))) { - applog(LOG_ERR, "Failed to queue_request in gpuminer_thread %d", thr_id); - goto out; - } - thread_reportin(mythr); - requested = true; - } - } - if (unlikely(!gpu_devices[gpu] || mythr->pause)) { - applog(LOG_WARNING, "Thread %d being disabled", thr_id); - mythr->rolling = mythr->cgpu->rolling = 0; - if (opt_debug) - applog(LOG_DEBUG, "Popping wakeup ping in gpuminer thread"); - - thread_reportout(mythr); - tq_pop(mythr->q, NULL); /* Ignore ping that's popped */ - thread_reportin(mythr); - applog(LOG_WARNING, "Thread %d being re-enabled", thr_id); - } - } -out: - clReleaseCommandQueue(clState->commandQueue); - clReleaseKernel(clState->kernel); - clReleaseProgram(clState->program); - clReleaseContext(clState->context); - - thread_reportin(mythr); - applog(LOG_ERR, "Thread %d failure, exiting", thr_id); - tq_freeze(mythr->q); - - return NULL; -} #endif /* HAVE_OPENCL */ /* Stage another work item from the work returned in a longpoll */ @@ -5068,7 +4721,7 @@ static void *reinit_cpu(void *userdata) pthread_detach(pthread_self()); #if 0 struct cgpu_info *cgpu = (struct cgpu_info *)userdata; - int cpu = cgpu->cpu_gpu; + int cpu = cgpu->device_id; long thr_id = ....(long)userdata; struct thr_info *thr = &thr_info[thr_id]; int cpu = dev_from_id(thr_id); @@ -5124,10 +4777,14 @@ select_cgpu: goto out; } - gpu = cgpu->cpu_gpu; - gpu_devices[gpu] = false; + gpu = cgpu->device_id; + cgpu->enabled = false; - for (thr_id = 0; thr_id < gpu_threads; thr_id ++) { + for (thr_id = 0; thr_id < mining_threads; ++thr_id) { + thr = &thr_info[thr_id]; + cgpu = thr->cgpu; + if (cgpu->api != &opencl_api) + continue; if (dev_from_id(thr_id) != gpu) continue; @@ -5146,14 +4803,16 @@ select_cgpu: applog(LOG_WARNING, "Thread %d no longer exists", thr_id); } - gpu_devices[gpu] = true; + cgpu->enabled = true; - for (thr_id = 0; thr_id < gpu_threads; thr_id ++) { + for (thr_id = 0; thr_id < mining_threads; ++thr_id) { + thr = &thr_info[thr_id]; + cgpu = thr->cgpu; + if (cgpu->api != &opencl_api) + continue; if (dev_from_id(thr_id) != gpu) continue; - thr = &thr_info[thr_id]; - /* Lose this ram cause we may get stuck here! */ //tq_freeze(thr->q); @@ -5172,7 +4831,7 @@ select_cgpu: } applog(LOG_INFO, "initCl() finished. Found %s", name); - if (unlikely(thr_info_create(thr, NULL, gpuminer_thread, thr))) { + if (unlikely(thr_info_create(thr, NULL, miner_thread, thr))) { applog(LOG_ERR, "thread %d create failed", thr_id); return NULL; } @@ -5182,11 +4841,14 @@ select_cgpu: gettimeofday(&now, NULL); get_datestamp(cgpu->init, &now); - for (thr_id = 0; thr_id < gpu_threads; thr_id ++) { + for (thr_id = 0; thr_id < mining_threads; ++thr_id) { + thr = &thr_info[thr_id]; + cgpu = thr->cgpu; + if (cgpu->api != &opencl_api) + continue; if (dev_from_id(thr_id) != gpu) continue; - thr = &thr_info[thr_id]; tq_push(thr->q, &ping); } @@ -5202,24 +4864,16 @@ static void *reinit_gpu(void *userdata) void reinit_device(struct cgpu_info *cgpu) { - if (cgpu->is_gpu) - tq_push(thr_info[gpur_thr_id].q, cgpu); - else - tq_push(thr_info[cpur_thr_id].q, cgpu); + if (cgpu->api->reinit_device) + cgpu->api->reinit_device(cgpu); } /* Determine which are the first threads belonging to a device and if they're * active */ static bool active_device(int thr_id) { - if (thr_id < gpu_threads) { - if (thr_id >= total_devices) - return false; - if (!gpu_devices[dev_from_id(thr_id)]) - return false; - } else if (thr_id > gpu_threads + num_processors) - return false; - return true; + struct cgpu_info *cgpu = thr_info[thr_id].cgpu; + return cgpu->enabled; } /* Makes sure the hashmeter keeps going even if mining threads stall, updates @@ -5314,25 +4968,30 @@ static void *watchdog_thread(void *userdata) struct thr_info *thr; thr = &thr_info[i]; - /* Don't touch disabled GPUs */ - if (thr->cgpu->is_gpu && !gpu_devices[thr->cgpu->cpu_gpu]) + /* Don't touch disabled devices */ + if (!thr->cgpu->enabled) continue; thr->pause = false; tq_push(thr->q, &ping); } } - for (i = 0; i < gpu_threads; i++) { +#ifdef HAVE_OPENCL + for (i = 0; i < mining_threads; i++) { struct thr_info *thr; bool *enable; + struct cgpu_info *cgpu; int gpu; + thr = &thr_info[i]; + cgpu = thr->cgpu; + if (cgpu->api != &opencl_api) + continue; /* Use only one thread per device to determine if the GPU is healthy */ if (i >= nDevs) break; - thr = &thr_info[i]; - gpu = thr->cgpu->cpu_gpu; - enable = &gpu_devices[gpu]; + gpu = thr->cgpu->device_id; + enable = &cgpu->enabled; #ifdef HAVE_ADL if (adl_active && gpus[gpu].has_adl && *enable) gpu_autotune(gpu, enable); @@ -5384,6 +5043,7 @@ static void *watchdog_thread(void *userdata) reinit_device(thr->cgpu); } } +#endif } return NULL; @@ -5687,29 +5347,494 @@ static void enable_curses(void) { unlock_curses(); } -int main (int argc, char *argv[]) -{ - unsigned int i, pools_active = 0; - struct block *block, *tmpblock; - struct work *work, *tmpwork; - struct sigaction handler; - struct thr_info *thr; - char name[256]; - /* This dangerous functions tramples random dynamically allocated - * variables so do it before anything at all */ - if (unlikely(curl_global_init(CURL_GLOBAL_ALL))) - quit(1, "Failed to curl_global_init"); +struct device_api cpu_api; - if (unlikely(pthread_mutex_init(&hash_lock, NULL))) - quit(1, "Failed to pthread_mutex_init"); - if (unlikely(pthread_mutex_init(&qd_lock, NULL))) - quit(1, "Failed to pthread_mutex_init"); - if (unlikely(pthread_mutex_init(&curses_lock, NULL))) - quit(1, "Failed to pthread_mutex_init"); - if (unlikely(pthread_mutex_init(&control_lock, NULL))) - quit(1, "Failed to pthread_mutex_init"); - if (unlikely(pthread_rwlock_init(&blk_lock, NULL))) +static void cpu_detect() +{ + int i; + + // Reckon number of cores in the box + #if defined(WIN32) + { + DWORD system_am; + DWORD process_am; + BOOL ok = GetProcessAffinityMask( + GetCurrentProcess(), + &system_am, + &process_am + ); + if (!ok) { + applog(LOG_ERR, "couldn't figure out number of processors :("); + num_processors = 1; + } else { + size_t n = 32; + num_processors = 0; + while (n--) + if (process_am & (1< MAX_DEVICES) + opt_n_threads = MAX_DEVICES - total_devices; + cpus = calloc(opt_n_threads, sizeof(struct cgpu_info)); + if (unlikely(!cpus)) + quit(1, "Failed to calloc cpus"); + for (i = 0; i < opt_n_threads; ++i) { + struct cgpu_info *cgpu; + + cgpu = devices[total_devices + i] = &cpus[i]; + cgpu->api = &cpu_api; + cgpu->enabled = true; + cgpu->device_id = i; + cgpu->threads = 1; + } + total_devices += opt_n_threads; +} + +static void reinit_cpu_device(struct cgpu_info *cpu) +{ + tq_push(thr_info[cpur_thr_id].q, cpu); +} + +static bool cpu_thread_prepare(struct thr_info *thr) +{ + thread_reportin(thr); + + return true; +} + +static uint64_t cpu_can_limit_work(struct thr_info *thr) +{ + return 0xfffff; +} + +static bool cpu_thread_init(struct thr_info *thr) +{ + const int thr_id = thr->id; + + /* Set worker threads to nice 19 and then preferentially to SCHED_IDLE + * and if that fails, then SCHED_BATCH. No need for this to be an + * error if it fails */ + setpriority(PRIO_PROCESS, 0, 19); + drop_policy(); + /* Cpu affinity only makes sense if the number of threads is a multiple + * of the number of CPUs */ + if (!(opt_n_threads % num_processors)) + affine_to_cpu(dev_from_id(thr_id), dev_from_id(thr_id) % num_processors); + return true; +} + +static uint64_t cpu_scanhash(struct thr_info *thr, struct work *work, uint64_t max_nonce) +{ + const int thr_id = thr->id; + + uint32_t first_nonce = work->blk.nonce; + uint32_t last_nonce; + bool rc; + +CPUSearch: + last_nonce = first_nonce; + rc = false; + + /* scan nonces for a proof-of-work hash */ + { + sha256_func func = sha256_funcs[opt_algo]; + rc = (*func)( + thr_id, + work->midstate, + work->data, + work->hash1, + work->hash, + work->target, + max_nonce, + &last_nonce, + work->blk.nonce + ); + } + + /* if nonce found, submit work */ + if (unlikely(rc)) { + if (opt_debug) + applog(LOG_DEBUG, "CPU %d found something?", dev_from_id(thr_id)); + if (unlikely(!submit_work_sync(thr, work))) { + applog(LOG_ERR, "Failed to submit_work_sync in miner_thread %d", thr_id); + } + work->blk.nonce = last_nonce + 1; + goto CPUSearch; + } + else + if (unlikely(last_nonce == first_nonce)) + return 0; + + work->blk.nonce = last_nonce + 1; + return last_nonce - first_nonce + 1; +} + +struct device_api cpu_api = { + .name = "CPU", + .api_detect = cpu_detect, + .reinit_device = reinit_cpu_device, + .thread_prepare = cpu_thread_prepare, + .can_limit_work = cpu_can_limit_work, + .thread_init = cpu_thread_init, + .scanhash = cpu_scanhash, +}; + + +#ifdef HAVE_OPENCL +struct device_api opencl_api; + +static void opencl_detect() +{ + int i; + + nDevs = clDevicesNum(); + if (nDevs < 0) { + applog(LOG_ERR, "clDevicesNum returned error, none usable"); + nDevs = 0; + } + + if (MAX_DEVICES - total_devices < nDevs) + nDevs = MAX_DEVICES - total_devices; + + if (!nDevs) { + return; + } + + if (opt_kernel) { + if (strcmp(opt_kernel, "poclbm") && strcmp(opt_kernel, "phatk")) + quit(1, "Invalid kernel name specified - must be poclbm or phatk"); + if (!strcmp(opt_kernel, "poclbm")) + chosen_kernel = KL_POCLBM; + else + chosen_kernel = KL_PHATK; + } else + chosen_kernel = KL_NONE; + + for (i = 0; i < nDevs; ++i) { + struct cgpu_info *cgpu; + cgpu = devices[total_devices++] = &gpus[i]; + cgpu->enabled = true; + cgpu->api = &opencl_api; + cgpu->device_id = i; + cgpu->threads = opt_g_threads; + } +} + +static void reinit_opencl_device(struct cgpu_info *gpu) +{ + tq_push(thr_info[gpur_thr_id].q, gpu); +} + +static void get_opencl_statline(char *buf, struct cgpu_info *gpu) +{ + tailsprintf(buf, " | I:%2d", gpu->intensity); +#ifdef HAVE_ADL + if (gpu->has_adl) { + int gpuid = gpu->device_id; + float gt = gpu_temp(gpuid); + int gf = gpu_fanspeed(gpuid); + int gp; + + if (gt != -1) + tailsprintf(buf, " %5.1fC ", gt); + else + tailsprintf(buf, " ", gt); + if (gf != -1) + tailsprintf(buf, " %4dRPM", gf); + else if ((gp = gpu_fanpercent(gpuid)) != -1) + tailsprintf(buf, " %3d%%", gp); + } +#endif +} + +struct opencl_thread_data { + cl_int (*queue_kernel_parameters)(_clState *, dev_blk_ctx *); + uint32_t *res; + struct work *last_work; + struct work _last_work; +}; + +static uint32_t *blank_res; + +static bool opencl_thread_prepare(struct thr_info *thr) +{ + char name[256]; + struct timeval now; + struct cgpu_info *cgpu = thr->cgpu; + int gpu = cgpu->device_id; + int i = thr->id; + static bool failmessage = false; + + if (!blank_res) + blank_res = calloc(BUFFERSIZE, 1); + if (!blank_res) { + applog(LOG_ERR, "Failed to calloc in opencl_thread_init"); + return false; + } + + applog(LOG_INFO, "Init GPU thread %i", i); + clStates[i] = initCl(gpu, name, sizeof(name)); + if (!clStates[i]) { + enable_curses(); + applog(LOG_ERR, "Failed to init GPU thread %d, disabling device %d", i, gpu); + if (!failmessage) { + char *buf; + + applog(LOG_ERR, "Restarting the GPU from the menu is unlikely to fix this."); + applog(LOG_ERR, "Try stopping other applications using the GPU like afterburner."); + applog(LOG_ERR, "Then restart cgminer."); + failmessage = true; + buf = curses_input("Press enter to continue"); + if (buf) + free(buf); + } + cgpu->enabled = false; + cgpu->status = LIFE_NOSTART; + return false; + } + applog(LOG_INFO, "initCl() finished. Found %s", name); + gettimeofday(&now, NULL); + get_datestamp(cgpu->init, &now); + + have_opencl = true; + + return true; +} + +static bool opencl_thread_init(struct thr_info *thr) +{ + const int thr_id = thr->id; + struct cgpu_info *gpu = thr->cgpu; + + struct opencl_thread_data *thrdata; + thrdata = calloc(1, sizeof(*thrdata)); + thr->cgpu_data = thrdata; + + if (!thrdata) { + applog(LOG_ERR, "Failed to calloc in opencl_thread_init"); + return false; + } + + switch (chosen_kernel) { + case KL_POCLBM: + thrdata->queue_kernel_parameters = &queue_poclbm_kernel; + break; + case KL_PHATK: + default: + thrdata->queue_kernel_parameters = &queue_phatk_kernel; + break; + } + + thrdata->res = calloc(BUFFERSIZE, 1); + + if (!thrdata->res) { + free(thrdata); + applog(LOG_ERR, "Failed to calloc in opencl_thread_init"); + return false; + } + + _clState *clState = clStates[thr_id]; + cl_int status; + + status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0, + BUFFERSIZE, blank_res, 0, NULL, NULL); + if (unlikely(status != CL_SUCCESS)) { + applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); + return false; + } + + gpu->status = LIFE_WELL; + + return true; +} + +static void opencl_free_work(struct thr_info *thr, struct work *work) +{ + const int thr_id = thr->id; + struct opencl_thread_data *thrdata = thr->cgpu_data; + _clState *clState = clStates[thr_id]; + + clFinish(clState->commandQueue); + if (thrdata->res[FOUND]) { + thrdata->last_work = &thrdata->_last_work; + memcpy(thrdata->last_work, work, sizeof(*thrdata->last_work)); + } +} + +static bool opencl_prepare_work(struct thr_info *thr, struct work *work) +{ + precalc_hash(&work->blk, (uint32_t *)(work->midstate), (uint32_t *)(work->data + 64)); + return true; +} + +static uint64_t opencl_scanhash(struct thr_info *thr, struct work *work, uint64_t max_nonce) +{ + const int thr_id = thr->id; + struct opencl_thread_data *thrdata = thr->cgpu_data; + struct cgpu_info *gpu = thr->cgpu; + _clState *clState = clStates[thr_id]; + const cl_kernel *kernel = &clState->kernel; + + double gpu_ms_average = 7; + cl_int status; + + size_t globalThreads[1]; + size_t localThreads[1] = { clState->work_size }; + unsigned int threads; + unsigned int hashes; + + + struct timeval tv_gpustart, tv_gpuend, diff; + suseconds_t gpu_us; + + gettimeofday(&tv_gpustart, NULL); + timeval_subtract(&diff, &tv_gpustart, &tv_gpuend); + /* This finish flushes the readbuffer set with CL_FALSE later */ + clFinish(clState->commandQueue); + gettimeofday(&tv_gpuend, NULL); + timeval_subtract(&diff, &tv_gpuend, &tv_gpustart); + gpu_us = diff.tv_sec * 1000000 + diff.tv_usec; + decay_time(&gpu_ms_average, gpu_us / 1000); + if (gpu->dynamic) { + /* Try to not let the GPU be out for longer than 6ms, but + * increase intensity when the system is idle, unless + * dynamic is disabled. */ + if (gpu_ms_average > 7) { + if (gpu->intensity > -10) + --gpu->intensity; + } else if (gpu_ms_average < 3) { + if (gpu->intensity < 10) + ++gpu->intensity; + } + } + set_threads_hashes(clState->preferred_vwidth, &threads, &hashes, globalThreads, + localThreads[0], gpu->intensity); + + status = thrdata->queue_kernel_parameters(clState, &work->blk); + if (unlikely(status != CL_SUCCESS)) { + applog(LOG_ERR, "Error: clSetKernelArg of all params failed."); + return 0; + } + + /* MAXBUFFERS entry is used as a flag to say nonces exist */ + if (thrdata->res[FOUND]) { + /* Clear the buffer again */ + status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0, + BUFFERSIZE, blank_res, 0, NULL, NULL); + if (unlikely(status != CL_SUCCESS)) { + applog(LOG_ERR, "Error: clEnqueueWriteBuffer failed."); + return 0; + } + if (unlikely(thrdata->last_work)) { + if (opt_debug) + applog(LOG_DEBUG, "GPU %d found something in last work?", gpu->device_id); + postcalc_hash_async(thr, thrdata->last_work, thrdata->res); + thrdata->last_work = NULL; + } else { + if (opt_debug) + applog(LOG_DEBUG, "GPU %d found something?", gpu->device_id); + postcalc_hash_async(thr, work, thrdata->res); + } + memset(thrdata->res, 0, BUFFERSIZE); + clFinish(clState->commandQueue); + } + status = clEnqueueNDRangeKernel(clState->commandQueue, *kernel, 1, NULL, + globalThreads, localThreads, 0, NULL, NULL); + if (unlikely(status != CL_SUCCESS)) { + applog(LOG_ERR, "Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)"); + return 0; + } + + status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0, + BUFFERSIZE, thrdata->res, 0, NULL, NULL); + if (unlikely(status != CL_SUCCESS)) { + applog(LOG_ERR, "Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer)"); + return 0; + } + + work->blk.nonce += hashes; + + return hashes; +} + +static void opencl_thread_shutdown(struct thr_info *thr) +{ + const int thr_id = thr->id; + _clState *clState = clStates[thr_id]; + + clReleaseCommandQueue(clState->commandQueue); + clReleaseKernel(clState->kernel); + clReleaseProgram(clState->program); + clReleaseContext(clState->context); +} + +struct device_api opencl_api = { + .name = "GPU", + .api_detect = opencl_detect, + .reinit_device = reinit_opencl_device, + .get_statline = get_opencl_statline, + .thread_prepare = opencl_thread_prepare, + .thread_init = opencl_thread_init, + .free_work = opencl_free_work, + .prepare_work = opencl_prepare_work, + .scanhash = opencl_scanhash, + .thread_shutdown = opencl_thread_shutdown, +}; +#endif + + +static int cgminer_id_count = 0; + +void enable_device(struct cgpu_info *cgpu) +{ + cgpu->enabled = true; + devices[cgpu->cgminer_id = cgminer_id_count++] = cgpu; + mining_threads += cgpu->threads; +#ifdef OPENCL + if (cgpu->api == &opencl_api) { + gpu_threads += cgpu->threads; + } +#endif +} + +int main (int argc, char *argv[]) +{ + unsigned int i, pools_active = 0; + unsigned int j, k; + struct block *block, *tmpblock; + struct work *work, *tmpwork; + struct sigaction handler; + struct thr_info *thr; + + /* This dangerous functions tramples random dynamically allocated + * variables so do it before anything at all */ + if (unlikely(curl_global_init(CURL_GLOBAL_ALL))) + quit(1, "Failed to curl_global_init"); + + if (unlikely(pthread_mutex_init(&hash_lock, NULL))) + quit(1, "Failed to pthread_mutex_init"); + if (unlikely(pthread_mutex_init(&qd_lock, NULL))) + quit(1, "Failed to pthread_mutex_init"); + if (unlikely(pthread_mutex_init(&curses_lock, NULL))) + quit(1, "Failed to pthread_mutex_init"); + if (unlikely(pthread_mutex_init(&control_lock, NULL))) + quit(1, "Failed to pthread_mutex_init"); + if (unlikely(pthread_rwlock_init(&blk_lock, NULL))) quit(1, "Failed to pthread_rwlock_init"); sprintf(packagename, "%s %s", PACKAGE, VERSION); @@ -5744,48 +5869,11 @@ int main (int argc, char *argv[]) HASH_ADD_STR(blocks, hash, block); strcpy(current_block, block->hash); - // Reckon number of cores in the box - #if defined(WIN32) - - DWORD system_am; - DWORD process_am; - BOOL ok = GetProcessAffinityMask( - GetCurrentProcess(), - &system_am, - &process_am - ); - if (!ok) { - applog(LOG_ERR, "couldn't figure out number of processors :("); - num_processors = 1; - } else { - size_t n = 32; - num_processors = 0; - while (n--) - if (process_am & (1< nDevs) - quit(1, "More devices specified than exist"); - for (i = 0; i < MAX_GPUDEVICES; i++) - if (gpu_devices[i] && i + 1 > nDevs) - quit (1, "Command line options set a device that doesn't exist"); -#ifdef HAVE_ADL - for (i = 0; i < nDevs; i++) { - /* Make sure we do not attempt to adl manage devices - * that we disable */ - if (!gpu_devices[i]) - gpus[i].gpu_engine = - gpus[i].gpu_memclock = - gpus[i].gpu_vddc = - gpus[i].gpu_fan = - gpus[i].gpu_powertune = 0; - } -#endif - } else { - for (i = 0; i < nDevs; i++) - gpu_devices[i] = true; - total_devices = nDevs; - } -#else - gpu_threads = 0; + if (!opt_nogpu) + opencl_api.api_detect(); #endif - if (!gpu_threads && !forced_n_threads) { - /* Maybe they turned GPU off; restore default CPU threads. */ - opt_n_threads = num_processors; + cpu_api.api_detect(); + + if (devices_enabled == -1) { + applog(LOG_ERR, "Devices detected:"); + for (i = 0; i < total_devices; ++i) { + applog(LOG_ERR, " %2d. %s%d", i, devices[i]->api->name, devices[i]->device_id); + } + quit(0, "%d devices listed", total_devices); + } + + mining_threads = 0; + gpu_threads = 0; + if (devices_enabled) { + for (i = 0; i < (sizeof(devices_enabled) * 8) - 1; ++i) { + if (devices_enabled & (1 << i)) { + if (i >= total_devices) + quit (1, "Command line options set a device that doesn't exist"); + enable_device(devices[i]); + } else if (i < total_devices) { + if (opt_removedisabled) { + if (devices[i]->api == &cpu_api) + --opt_n_threads; + } else { + enable_device(devices[i]); + } + devices[i]->enabled = false; + } + } + total_devices = cgminer_id_count; + } else { + for (i = 0; i < total_devices; ++i) + enable_device(devices[i]); } - if (!opt_n_threads && ! gpu_threads) + if (!total_devices) quit(1, "All devices disabled, cannot mine!"); - logcursor = 8; - gpucursor = logcursor; - cpucursor = gpucursor + nDevs; - logstart = cpucursor + 1; - if (opt_n_threads) { - if (opt_n_threads < num_processors) - logstart += opt_n_threads; - else - logstart += num_processors; - } + devcursor = 8; + logstart = devcursor + total_devices + 1; logcursor = logstart + 1; if (opt_realquiet) @@ -5953,8 +6022,6 @@ int main (int argc, char *argv[]) fork_monitor(); #endif // defined(unix) - mining_threads = opt_n_threads + gpu_threads; - total_threads = mining_threads + 8; work_restart = calloc(total_threads, sizeof(*work_restart)); if (!work_restart) @@ -5984,12 +6051,6 @@ int main (int argc, char *argv[]) if (!thr->q) quit(1, "Failed to tq_new"); - if (opt_n_threads ) { - cpus = calloc(num_processors, sizeof(struct cgpu_info)); - if (unlikely(!cpus)) - quit(1, "Failed to calloc cpus"); - } - stage_thr_id = mining_threads + 3; thr = &thr_info[stage_thr_id]; thr->q = tq_new(); @@ -6071,87 +6132,42 @@ retry_pools: #ifdef HAVE_OPENCL if (!opt_noadl) init_adl(nDevs); - bool failmessage = false; - - /* start GPU mining threads */ - for (i = 0; i < nDevs * opt_g_threads; i++) { - int gpu = i % nDevs; - struct cgpu_info *cgpu; - struct timeval now; - - gpus[gpu].is_gpu = 1; - gpus[gpu].cpu_gpu = gpu; - - thr = &thr_info[i]; - thr->id = i; - cgpu = thr->cgpu = &gpus[gpu]; - - thr->q = tq_new(); - if (!thr->q) - quit(1, "tq_new failed in starting gpu mining threads"); - - /* Enable threads for devices set not to mine but disable - * their queue in case we wish to enable them later*/ - if (gpu_devices[gpu]) { - if (opt_debug) - applog(LOG_DEBUG, "Pushing ping to thread %d", thr->id); - - tq_push(thr->q, &ping); - } - - applog(LOG_INFO, "Init GPU thread %i", i); - clStates[i] = initCl(gpu, name, sizeof(name)); - if (!clStates[i]) { - enable_curses(); - applog(LOG_ERR, "Failed to init GPU thread %d, disabling device %d", i, gpu); - if (!failmessage) { - char *buf; - - applog(LOG_ERR, "Restarting the GPU from the menu is unlikely to fix this."); - applog(LOG_ERR, "Try stopping other applications using the GPU like afterburner."); - applog(LOG_ERR, "Then restart cgminer."); - failmessage = true; - buf = curses_input("Press enter to continue"); - if (buf) - free(buf); - } - gpu_devices[gpu] = false; - cgpu->status = LIFE_NOSTART; - continue; - } - applog(LOG_INFO, "initCl() finished. Found %s", name); - gettimeofday(&now, NULL); - get_datestamp(cgpu->init, &now); - - if (unlikely(thr_info_create(thr, NULL, gpuminer_thread, thr))) - quit(1, "thread %d create failed", i); - } - - applog(LOG_INFO, "%d gpu miner threads started", gpu_threads); #else opt_g_threads = 0; #endif - /* start CPU mining threads */ - for (i = gpu_threads; i < mining_threads; i++) { - int cpu = (i - gpu_threads) % num_processors; + // Start threads + k = 0; + for (i = 0; i < total_devices; ++i) { + struct cgpu_info *cgpu = devices[i]; + for (j = 0; j < cgpu->threads; ++j, ++k) { + thr = &thr_info[k]; + thr->id = k; + thr->cgpu = cgpu; + + thr->q = tq_new(); + if (!thr->q) + quit(1, "tq_new failed in starting %s%d mining thread (#%d)", cgpu->api->name, cgpu->device_id, i); + + /* Enable threads for devices set not to mine but disable + * their queue in case we wish to enable them later */ + if (cgpu->enabled) { + if (opt_debug) + applog(LOG_DEBUG, "Pushing ping to thread %d", thr->id); - thr = &thr_info[i]; - - thr->id = i; - cpus[cpu].cpu_gpu = cpu; - thr->cgpu = &cpus[cpu]; - - thr->q = tq_new(); - if (!thr->q) - quit(1, "tq_new failed in starting cpu mining threads"); + tq_push(thr->q, &ping); + } - thread_reportin(thr); + if (cgpu->api->thread_prepare && !cgpu->api->thread_prepare(thr)) + continue; - if (unlikely(thr_info_create(thr, NULL, miner_thread, thr))) - quit(1, "thread %d create failed", i); + if (unlikely(thr_info_create(thr, NULL, miner_thread, thr))) + quit(1, "thread %d create failed", thr->id); + } } + applog(LOG_INFO, "%d gpu miner threads started", gpu_threads); + applog(LOG_INFO, "%d cpu miner threads started, " "using SHA256 '%s' algorithm.", opt_n_threads, diff --git a/miner.h b/miner.h index bd451401..86352a38 100644 --- a/miner.h +++ b/miner.h @@ -208,9 +208,35 @@ struct gpu_adl { }; #endif +struct cgpu_info; +struct thr_info; +struct work; + +struct device_api { + char*name; + + // API-global functions + void (*api_detect)(); + + // Device-specific functions + void (*reinit_device)(struct cgpu_info*); + void (*get_statline)(char*, struct cgpu_info*); + + // Thread-specific functions + bool (*thread_prepare)(struct thr_info*); + uint64_t (*can_limit_work)(struct thr_info*); + bool (*thread_init)(struct thr_info*); + void (*free_work)(struct thr_info*, struct work*); + bool (*prepare_work)(struct thr_info*, struct work*); + uint64_t (*scanhash)(struct thr_info*, struct work*, uint64_t); + void (*thread_shutdown)(struct thr_info*); +}; + struct cgpu_info { - int is_gpu; - int cpu_gpu; + int cgminer_id; + struct device_api *api; + int device_id; + bool enabled; int accepted; int rejected; int hw_errors; @@ -221,6 +247,8 @@ struct cgpu_info { char init[40]; struct timeval last_message_tv; + int threads; + bool dynamic; int intensity; #ifdef HAVE_ADL @@ -257,6 +285,7 @@ struct thr_info { pthread_t pth; struct thread_q *q; struct cgpu_info *cgpu; + void *cgpu_data; struct timeval last; struct timeval sick; @@ -352,57 +381,62 @@ extern json_t *json_rpc_call(CURL *curl, const char *url, const char *userpass, extern char *bin2hex(const unsigned char *p, size_t len); extern bool hex2bin(unsigned char *p, const char *hexstr, size_t len); -extern unsigned int ScanHash_4WaySSE2(int, const unsigned char *pmidstate, - unsigned char *pdata, unsigned char *phash1, unsigned char *phash, - const unsigned char *ptarget, - uint32_t max_nonce, unsigned long *nHashesDone, uint32_t nonce); - -extern unsigned int ScanHash_altivec_4way(int thr_id, const unsigned char *pmidstate, +typedef bool (*sha256_func)(int thr_id, const unsigned char *pmidstate, unsigned char *pdata, unsigned char *phash1, unsigned char *phash, const unsigned char *ptarget, - uint32_t max_nonce, unsigned long *nHashesDone, uint32_t nonce); + uint32_t max_nonce, + uint32_t *last_nonce, + uint32_t nonce); -extern unsigned int scanhash_sse2_amd64(int, const unsigned char *pmidstate, +extern bool ScanHash_4WaySSE2(int, const unsigned char *pmidstate, unsigned char *pdata, unsigned char *phash1, unsigned char *phash, const unsigned char *ptarget, - uint32_t max_nonce, unsigned long *nHashesDone); + uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce); -extern bool scanhash_via(int, unsigned char *data_inout, +extern bool ScanHash_altivec_4way(int thr_id, const unsigned char *pmidstate, + unsigned char *pdata, + unsigned char *phash1, unsigned char *phash, + const unsigned char *ptarget, + uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce); + +extern bool scanhash_via(int, const unsigned char *pmidstate, + unsigned char *pdata, + unsigned char *phash1, unsigned char *phash, const unsigned char *target, - uint32_t max_nonce, unsigned long *hashes_done, uint32_t n); + uint32_t max_nonce, uint32_t *last_nonce, uint32_t n); extern bool scanhash_c(int, const unsigned char *midstate, unsigned char *data, unsigned char *hash1, unsigned char *hash, const unsigned char *target, - uint32_t max_nonce, unsigned long *hashes_done, uint32_t n); + uint32_t max_nonce, uint32_t *last_nonce, uint32_t n); extern bool scanhash_cryptopp(int, const unsigned char *midstate,unsigned char *data, unsigned char *hash1, unsigned char *hash, const unsigned char *target, - uint32_t max_nonce, unsigned long *hashes_done, uint32_t n); + uint32_t max_nonce, uint32_t *last_nonce, uint32_t n); extern bool scanhash_asm32(int, const unsigned char *midstate,unsigned char *data, unsigned char *hash1, unsigned char *hash, const unsigned char *target, - uint32_t max_nonce, unsigned long *hashes_done, uint32_t nonce); + uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce); -extern int scanhash_sse2_64(int, const unsigned char *pmidstate, unsigned char *pdata, +extern bool scanhash_sse2_64(int, const unsigned char *pmidstate, unsigned char *pdata, unsigned char *phash1, unsigned char *phash, const unsigned char *ptarget, - uint32_t max_nonce, unsigned long *nHashesDone, + uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce); -extern int scanhash_sse4_64(int, const unsigned char *pmidstate, unsigned char *pdata, +extern bool scanhash_sse4_64(int, const unsigned char *pmidstate, unsigned char *pdata, unsigned char *phash1, unsigned char *phash, const unsigned char *ptarget, - uint32_t max_nonce, unsigned long *nHashesDone, + uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce); -extern int scanhash_sse2_32(int, const unsigned char *pmidstate, unsigned char *pdata, +extern bool scanhash_sse2_32(int, const unsigned char *pmidstate, unsigned char *pdata, unsigned char *phash1, unsigned char *phash, const unsigned char *ptarget, - uint32_t max_nonce, unsigned long *nHashesDone, + uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce); extern int @@ -431,6 +465,7 @@ extern bool gpu_stats(int gpu, float *temp, int *engineclock, int *memclock, flo extern void api(void); #define MAX_GPUDEVICES 16 +#define MAX_DEVICES 32 #define MAX_POOLS (32) extern int nDevs; @@ -444,9 +479,10 @@ extern struct work_restart *work_restart; extern struct cgpu_info gpus[MAX_GPUDEVICES]; extern int gpu_threads; extern double total_secs; -extern bool gpu_devices[MAX_GPUDEVICES]; extern int mining_threads; extern struct cgpu_info *cpus; +extern int total_devices; +extern struct cgpu_info *devices[]; extern int total_pools; extern struct pool *pools[MAX_POOLS]; extern const char *algo_names[]; diff --git a/sha256_4way.c b/sha256_4way.c index c81e05b9..15e852ac 100644 --- a/sha256_4way.c +++ b/sha256_4way.c @@ -101,14 +101,16 @@ static const unsigned int pSHA256InitState[8] = {0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19}; -unsigned int ScanHash_4WaySSE2(int thr_id, const unsigned char *pmidstate, +bool ScanHash_4WaySSE2(int thr_id, const unsigned char *pmidstate, unsigned char *pdata, unsigned char *phash1, unsigned char *phash, const unsigned char *ptarget, - uint32_t max_nonce, unsigned long *nHashesDone, + uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce) { - unsigned int *nNonce_p = (unsigned int*)(pdata + 12); + unsigned int *nNonce_p = (unsigned int*)(pdata + 76); + + pdata += 64; work_restart[thr_id].restart = 0; @@ -132,17 +134,18 @@ unsigned int ScanHash_4WaySSE2(int thr_id, const unsigned char *pmidstate, ((unsigned int*)phash)[i] = thash[i][j]; if (fulltest(phash, ptarget)) { - *nHashesDone = nonce; - *nNonce_p = nonce + j; - return nonce + j; + nonce += j; + *last_nonce = nonce; + *nNonce_p = nonce; + return true; } } } if ((nonce >= max_nonce) || work_restart[thr_id].restart) { - *nHashesDone = nonce; - return -1; + *last_nonce = nonce; + return false; } } } diff --git a/sha256_altivec_4way.c b/sha256_altivec_4way.c index b70e4d03..aa50486f 100644 --- a/sha256_altivec_4way.c +++ b/sha256_altivec_4way.c @@ -74,14 +74,16 @@ static const unsigned int pSHA256InitState[8] = {0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19}; -unsigned int ScanHash_altivec_4way(int thr_id, const unsigned char *pmidstate, +bool ScanHash_altivec_4way(int thr_id, const unsigned char *pmidstate, unsigned char *pdata, unsigned char *phash1, unsigned char *phash, const unsigned char *ptarget, - uint32_t max_nonce, unsigned long *nHashesDone, + uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce) { - unsigned int *nNonce_p = (unsigned int*)(pdata + 12); + unsigned int *nNonce_p = (unsigned int*)(pdata + 76); + + pdata += 64; work_restart[thr_id].restart = 0; @@ -104,17 +106,18 @@ unsigned int ScanHash_altivec_4way(int thr_id, const unsigned char *pmidstate, ((unsigned int*)phash)[i] = thash[i][j]; if (fulltest(phash, ptarget)) { - *nHashesDone = nonce; - *nNonce_p = nonce + j; - return nonce + j; + nonce += j; + *last_nonce = nonce; + *nNonce_p = nonce; + return true; } } } if ((nonce >= max_nonce) || work_restart[thr_id].restart) { - *nHashesDone = nonce; - return -1; + *last_nonce = nonce; + return false; } nonce += NPAR; diff --git a/sha256_cryptopp.c b/sha256_cryptopp.c index c0c1b6fd..11c1c5ca 100644 --- a/sha256_cryptopp.c +++ b/sha256_cryptopp.c @@ -97,11 +97,13 @@ bool scanhash_cryptopp(int thr_id, const unsigned char *midstate, unsigned char *data, unsigned char *hash1, unsigned char *hash, const unsigned char *target, - uint32_t max_nonce, unsigned long *hashes_done, + uint32_t max_nonce, uint32_t *last_nonce, uint32_t n) { uint32_t *hash32 = (uint32_t *) hash; - uint32_t *nonce = (uint32_t *)(data + 12); + uint32_t *nonce = (uint32_t *)(data + 76); + + data += 64; work_restart[thr_id].restart = 0; @@ -113,12 +115,12 @@ bool scanhash_cryptopp(int thr_id, const unsigned char *midstate, runhash(hash, hash1, sha256_init_state); if (unlikely((hash32[7] == 0) && fulltest(hash, target))) { - *hashes_done = n; + *last_nonce = n; return true; } if ((n >= max_nonce) || work_restart[thr_id].restart) { - *hashes_done = n; + *last_nonce = n; return false; } } @@ -579,11 +581,13 @@ bool scanhash_asm32(int thr_id, const unsigned char *midstate, unsigned char *data, unsigned char *hash1, unsigned char *hash, const unsigned char *target, - uint32_t max_nonce, unsigned long *hashes_done, + uint32_t max_nonce, uint32_t *last_nonce, uint32_t n) { uint32_t *hash32 = (uint32_t *) hash; - uint32_t *nonce = (uint32_t *)(data + 12); + uint32_t *nonce = (uint32_t *)(data + 76); + + data += 64; work_restart[thr_id].restart = 0; @@ -595,12 +599,12 @@ bool scanhash_asm32(int thr_id, const unsigned char *midstate, runhash32(hash, hash1, sha256_init_state); if (unlikely((hash32[7] == 0) && fulltest(hash, target))) { - *hashes_done = n; + *last_nonce = n; return true; } if ((n >= max_nonce) || work_restart[thr_id].restart) { - *hashes_done = n; + *last_nonce = n; return false; } } diff --git a/sha256_generic.c b/sha256_generic.c index 5bc4d972..05f4c376 100644 --- a/sha256_generic.c +++ b/sha256_generic.c @@ -242,13 +242,15 @@ const uint32_t sha256_init_state[8] = { bool scanhash_c(int thr_id, const unsigned char *midstate, unsigned char *data, unsigned char *hash1, unsigned char *hash, const unsigned char *target, - uint32_t max_nonce, unsigned long *hashes_done, + uint32_t max_nonce, uint32_t *last_nonce, uint32_t n) { uint32_t *hash32 = (uint32_t *) hash; - uint32_t *nonce = (uint32_t *)(data + 12); + uint32_t *nonce = (uint32_t *)(data + 76); unsigned long stat_ctr = 0; + data += 64; + work_restart[thr_id].restart = 0; while (1) { @@ -261,12 +263,12 @@ bool scanhash_c(int thr_id, const unsigned char *midstate, unsigned char *data, stat_ctr++; if (unlikely((hash32[7] == 0) && fulltest(hash, target))) { - *hashes_done = n; + *last_nonce = n; return true; } if ((n >= max_nonce) || work_restart[thr_id].restart) { - *hashes_done = n; + *last_nonce = n; return false; } } diff --git a/sha256_sse2_amd64.c b/sha256_sse2_amd64.c index 561aa3a1..5c82314f 100644 --- a/sha256_sse2_amd64.c +++ b/sha256_sse2_amd64.c @@ -50,14 +50,14 @@ const uint32_t sha256_init[8]__attribute__((aligned(0x100))) = __m128i g_4sha256_k[64]; __m128i sha256_consts_m128i[64]__attribute__((aligned(0x1000))); -int scanhash_sse2_64(int thr_id, const unsigned char *pmidstate, +bool scanhash_sse2_64(int thr_id, const unsigned char *pmidstate, unsigned char *pdata, unsigned char *phash1, unsigned char *phash, const unsigned char *ptarget, - uint32_t max_nonce, unsigned long *nHashesDone, + uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce) { - uint32_t *nNonce_p = (uint32_t *)(pdata + 12); + uint32_t *nNonce_p = (uint32_t *)(pdata + 76); uint32_t m_midstate[8], m_w[16], m_w1[16]; __m128i m_4w[64] __attribute__ ((aligned (0x100))); __m128i m_4hash[64] __attribute__ ((aligned (0x100))); @@ -65,6 +65,8 @@ int scanhash_sse2_64(int thr_id, const unsigned char *pmidstate, __m128i offset; int i; + pdata += 64; + work_restart[thr_id].restart = 0; /* For debugging */ @@ -114,19 +116,20 @@ int scanhash_sse2_64(int thr_id, const unsigned char *pmidstate, } if (fulltest(phash, ptarget)) { - *nHashesDone = nonce; - *nNonce_p = nonce + j; - return nonce + j; + nonce += j; + *last_nonce = nonce + 1; + *nNonce_p = nonce; + return true; } } - nonce += 4; - if (unlikely((nonce >= max_nonce) || work_restart[thr_id].restart)) { - *nHashesDone = nonce; - return -1; + *last_nonce = nonce; + return false; } + + nonce += 4; } } diff --git a/sha256_sse2_i386.c b/sha256_sse2_i386.c index 72a90c99..321626f0 100644 --- a/sha256_sse2_i386.c +++ b/sha256_sse2_i386.c @@ -50,14 +50,14 @@ const uint32_t sha256_32init[8]__attribute__((aligned(0x100))) = __m128i g_4sha256_k[64]; __m128i sha256_consts_m128i[64]__attribute__((aligned(0x1000))); -int scanhash_sse2_32(int thr_id, const unsigned char *pmidstate, +bool scanhash_sse2_32(int thr_id, const unsigned char *pmidstate, unsigned char *pdata, unsigned char *phash1, unsigned char *phash, const unsigned char *ptarget, - uint32_t max_nonce, unsigned long *nHashesDone, + uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce) { - uint32_t *nNonce_p = (uint32_t *)(pdata + 12); + uint32_t *nNonce_p = (uint32_t *)(pdata + 76); uint32_t m_midstate[8], m_w[16], m_w1[16]; __m128i m_4w[64] __attribute__ ((aligned (0x100))); __m128i m_4hash[64] __attribute__ ((aligned (0x100))); @@ -65,6 +65,8 @@ int scanhash_sse2_32(int thr_id, const unsigned char *pmidstate, __m128i offset; int i; + pdata += 64; + work_restart[thr_id].restart = 0; /* Message expansion */ @@ -105,20 +107,21 @@ int scanhash_sse2_32(int thr_id, const unsigned char *pmidstate, } if (fulltest(phash, ptarget)) { - *nHashesDone = nonce; - *nNonce_p = nonce + j; - return nonce + j; + nonce += j; + *last_nonce = nonce; + *nNonce_p = nonce; + return true; } } } + if (unlikely((nonce >= max_nonce) || work_restart[thr_id].restart)) { + *last_nonce = nonce; + return false; + } + nonce += 4; - if (unlikely((nonce >= max_nonce) || work_restart[thr_id].restart)) - { - *nHashesDone = nonce; - return -1; - } } } diff --git a/sha256_sse4_amd64.c b/sha256_sse4_amd64.c index f150e8c6..67e0604b 100644 --- a/sha256_sse4_amd64.c +++ b/sha256_sse4_amd64.c @@ -49,19 +49,21 @@ static uint32_t g_sha256_hinit[8] = __m128i g_4sha256_k[64]; -int scanhash_sse4_64(int thr_id, const unsigned char *pmidstate, +bool scanhash_sse4_64(int thr_id, const unsigned char *pmidstate, unsigned char *pdata, unsigned char *phash1, unsigned char *phash, const unsigned char *ptarget, - uint32_t max_nonce, unsigned long *nHashesDone, + uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce) { - uint32_t *nNonce_p = (uint32_t *)(pdata + 12); + uint32_t *nNonce_p = (uint32_t *)(pdata + 76); uint32_t m_midstate[8], m_w[16], m_w1[16]; __m128i m_4w[64], m_4hash[64], m_4hash1[64]; __m128i offset; int i; + pdata += 64; + work_restart[thr_id].restart = 0; /* For debugging */ @@ -113,19 +115,20 @@ int scanhash_sse4_64(int thr_id, const unsigned char *pmidstate, } if (fulltest(phash, ptarget)) { - *nHashesDone = nonce; - *nNonce_p = nonce + j; - return nonce + j; + nonce += j; + *last_nonce = nonce; + *nNonce_p = nonce; + return true; } } - nonce += 4; - if (unlikely((nonce >= max_nonce) || work_restart[thr_id].restart)) { - *nHashesDone = nonce; - return -1; + *last_nonce = nonce; + return false; } + + nonce += 4; } } diff --git a/sha256_via.c b/sha256_via.c index fdf0045e..550807cb 100644 --- a/sha256_via.c +++ b/sha256_via.c @@ -19,9 +19,11 @@ static void via_sha256(void *hash, void *buf, unsigned len) :"memory"); } -bool scanhash_via(int thr_id, unsigned char *data_inout, - const unsigned char *target, - uint32_t max_nonce, unsigned long *hashes_done, +bool scanhash_via(int thr_id, const unsigned char *pmidstate, + unsigned char *data_inout, + unsigned char *phash1, unsigned char *phash, + const unsigned char *target, + uint32_t max_nonce, uint32_t *last_nonce, uint32_t n) { unsigned char data[128] __attribute__((aligned(128))); @@ -70,12 +72,12 @@ bool scanhash_via(int thr_id, unsigned char *data_inout, dout32[i] = swab32(data32[i]); } - *hashes_done = n; + *last_nonce = n; return true; } if ((n >= max_nonce) || work_restart[thr_id].restart) { - *hashes_done = n; + *last_nonce = n; return false; } }