diff --git a/Makefile.am b/Makefile.am index 3079f6d4..bd350bce 100644 --- a/Makefile.am +++ b/Makefile.am @@ -57,29 +57,6 @@ if HAS_SCRYPT cgminer_SOURCES += scrypt.c scrypt.h endif -if HAS_CPUMINE -# original CPU related sources, unchanged -cgminer_SOURCES += \ - sha256_generic.c sha256_4way.c sha256_via.c \ - sha256_cryptopp.c sha256_sse2_amd64.c \ - sha256_sse4_amd64.c sha256_sse2_i386.c \ - sha256_altivec_4way.c - -# the CPU portion extracted from original main.c -cgminer_SOURCES += driver-cpu.h driver-cpu.c - -if HAS_YASM -AM_CFLAGS = -DHAS_YASM -if HAVE_x86_64 -SUBDIRS += x86_64 -cgminer_LDADD += x86_64/libx8664.a -else # HAVE_x86_64 -SUBDIRS += x86_32 -cgminer_LDADD += x86_32/libx8632.a -endif # HAVE_x86_64 -endif # HAS_YASM -endif # HAS_CPUMINE - if NEED_FPGAUTILS cgminer_SOURCES += fpgautils.c fpgautils.h endif diff --git a/api.c b/api.c index 24dcb589..f8488f7c 100644 --- a/api.c +++ b/api.c @@ -27,7 +27,6 @@ #include "compat.h" #include "miner.h" #include "util.h" -#include "driver-cpu.h" /* for algo_names[], TODO: re-factor dependency */ #if defined(USE_BFLSC) || defined(USE_AVALON) #define HAVE_AN_ASIC 1 @@ -38,7 +37,7 @@ #endif // Big enough for largest API request -// though a PC with 100s of PGAs/CPUs may exceed the size ... +// though a PC with 100s of PGAs may exceed the size ... // data is truncated at the end of the last record that fits // but still closed correctly for JSON // Current code assumes it can socket send this size + JSON_CLOSE + JSON_END @@ -187,9 +186,6 @@ static const char *DEVICECODE = "" #endif #ifdef USE_MODMINER "MMQ " -#endif -#ifdef WANT_CPUMINE - "CPU " #endif ""; @@ -224,13 +220,8 @@ static const char *OSINFO = #define _PGA "PGA" #endif -#ifdef WANT_CPUMINE -#define _CPU "CPU" -#endif - #define _GPUS "GPUS" #define _PGAS "PGAS" -#define _CPUS "CPUS" #define _NOTIFY "NOTIFY" #define _DEVDETAILS "DEVDETAILS" #define _BYE "BYE" @@ -265,13 +256,8 @@ static const char ISJSON = '{'; #define JSON_PGA JSON1 _PGA JSON2 #endif -#ifdef WANT_CPUMINE -#define JSON_CPU JSON1 _CPU JSON2 -#endif - #define JSON_GPUS JSON1 _GPUS JSON2 #define JSON_PGAS JSON1 _PGAS JSON2 -#define JSON_CPUS JSON1 _CPUS JSON2 #define JSON_NOTIFY JSON1 _NOTIFY JSON2 #define JSON_DEVDETAILS JSON1 _DEVDETAILS JSON2 #define JSON_BYE JSON1 _BYE JSON1 @@ -306,14 +292,8 @@ static const char *JSON_PARAMETER = "parameter"; #define MSG_MISID 15 #define MSG_GPUDEV 17 -#ifdef WANT_CPUMINE -#define MSG_CPUNON 16 -#define MSG_CPUDEV 18 -#define MSG_INVCPU 19 -#endif - #define MSG_NUMGPU 20 -#define MSG_NUMCPU 21 + #define MSG_VERSION 22 #define MSG_INVJSON 23 #define MSG_MISCMD 24 @@ -420,11 +400,9 @@ enum code_severity { enum code_parameters { PARAM_GPU, PARAM_PGA, - PARAM_CPU, PARAM_PID, PARAM_GPUMAX, PARAM_PGAMAX, - PARAM_CPUMAX, PARAM_PMAX, PARAM_POOLMAX, @@ -474,11 +452,8 @@ struct CODES { #ifdef HAVE_AN_FPGA "%d PGA(s)" #endif -#if defined(WANT_CPUMINE) && (defined(HAVE_OPENCL) || defined(HAVE_AN_ASIC) || defined(HAVE_AN_FPGA)) +#if (defined(HAVE_OPENCL) || defined(HAVE_AN_ASIC) || defined(HAVE_AN_FPGA)) " - " -#endif -#ifdef WANT_CPUMINE - "%d CPU(s)" #endif }, @@ -488,9 +463,6 @@ struct CODES { #endif #ifdef HAVE_AN_FPGA "/PGAs" -#endif -#ifdef WANT_CPUMINE - "/CPUs" #endif }, @@ -513,15 +485,9 @@ struct CODES { { SEVERITY_INFO, MSG_PGAENA, PARAM_PGA, "PGA %d sent enable message" }, { SEVERITY_INFO, MSG_PGADIS, PARAM_PGA, "PGA %d set disable flag" }, { SEVERITY_ERR, MSG_PGAUNW, PARAM_PGA, "PGA %d is not flagged WELL, cannot enable" }, -#endif -#ifdef WANT_CPUMINE - { SEVERITY_ERR, MSG_CPUNON, PARAM_NONE, "No CPUs" }, - { SEVERITY_SUCC, MSG_CPUDEV, PARAM_CPU, "CPU%d" }, - { SEVERITY_ERR, MSG_INVCPU, PARAM_CPUMAX, "Invalid CPU id %d - range is 0 - %d" }, #endif { SEVERITY_SUCC, MSG_NUMGPU, PARAM_NONE, "GPU count" }, { SEVERITY_SUCC, MSG_NUMPGA, PARAM_NONE, "PGA count" }, - { SEVERITY_SUCC, MSG_NUMCPU, PARAM_NONE, "CPU count" }, { SEVERITY_SUCC, MSG_VERSION, PARAM_NONE, "CGMiner versions" }, { SEVERITY_ERR, MSG_INVJSON, PARAM_NONE, "Invalid JSON" }, { SEVERITY_ERR, MSG_MISCMD, PARAM_CMD, "Missing JSON '%s'" }, @@ -1300,9 +1266,6 @@ static void message(struct io_data *io_data, int messageid, int paramid, char *p #endif #ifdef HAVE_AN_FPGA int pga; -#endif -#ifdef WANT_CPUMINE - int cpu; #endif int i; @@ -1333,7 +1296,6 @@ static void message(struct io_data *io_data, int messageid, int paramid, char *p switch(codes[i].params) { case PARAM_GPU: case PARAM_PGA: - case PARAM_CPU: case PARAM_PID: case PARAM_INT: sprintf(buf, codes[i].description, paramid); @@ -1351,15 +1313,6 @@ static void message(struct io_data *io_data, int messageid, int paramid, char *p pga = numpgas(); sprintf(buf, codes[i].description, paramid, pga - 1); break; -#endif -#ifdef WANT_CPUMINE - case PARAM_CPUMAX: - if (opt_n_threads > 0) - cpu = num_processors; - else - cpu = 0; - sprintf(buf, codes[i].description, paramid, cpu - 1); - break; #endif case PARAM_PMAX: sprintf(buf, codes[i].description, total_pools); @@ -1374,12 +1327,6 @@ static void message(struct io_data *io_data, int messageid, int paramid, char *p #ifdef HAVE_AN_FPGA pga = numpgas(); #endif -#ifdef WANT_CPUMINE - if (opt_n_threads > 0) - cpu = num_processors; - else - cpu = 0; -#endif sprintf(buf, codes[i].description #ifdef HAVE_OPENCL @@ -1390,9 +1337,6 @@ static void message(struct io_data *io_data, int messageid, int paramid, char *p #endif #ifdef HAVE_AN_FPGA , pga -#endif -#ifdef WANT_CPUMINE - , cpu #endif ); break; @@ -1470,7 +1414,6 @@ static void minerconfig(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __ int gpucount = 0; int asccount = 0; int pgacount = 0; - int cpucount = 0; char *adlinuse = (char *)NO; #ifdef HAVE_ADL const char *adl = YES; @@ -1498,17 +1441,12 @@ static void minerconfig(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __ pgacount = numpgas(); #endif -#ifdef WANT_CPUMINE - cpucount = opt_n_threads > 0 ? num_processors : 0; -#endif - message(io_data, MSG_MINECONFIG, 0, NULL, isjson); io_open = io_add(io_data, isjson ? COMSTR JSON_MINECONFIG : _MINECONFIG COMSTR); root = api_add_int(root, "GPU Count", &gpucount, false); root = api_add_int(root, "ASC Count", &asccount, false); root = api_add_int(root, "PGA Count", &pgacount, false); - root = api_add_int(root, "CPU Count", &cpucount, false); root = api_add_int(root, "Pool Count", &total_pools, false); root = api_add_const(root, "ADL", (char *)adl, false); root = api_add_string(root, "ADL in use", adlinuse, false); @@ -1759,43 +1697,6 @@ static void pgastatus(struct io_data *io_data, int pga, bool isjson, bool precom } #endif -#ifdef WANT_CPUMINE -static void cpustatus(struct io_data *io_data, int cpu, bool isjson, bool precom) -{ - struct api_data *root = NULL; - char buf[TMPBUFSIZ]; - - if (opt_n_threads > 0 && cpu >= 0 && cpu < num_processors) { - struct cgpu_info *cgpu = &cpus[cpu]; - - cgpu->utility = cgpu->accepted / ( total_secs ? total_secs : 1 ) * 60; - - root = api_add_int(root, "CPU", &cpu, false); - double mhs = cgpu->total_mhashes / total_secs; - root = api_add_mhs(root, "MHS av", &mhs, false); - char mhsname[27]; - sprintf(mhsname, "MHS %ds", opt_log_interval); - root = api_add_mhs(root, mhsname, &(cgpu->rolling), false); - root = api_add_int(root, "Accepted", &(cgpu->accepted), false); - root = api_add_int(root, "Rejected", &(cgpu->rejected), false); - root = api_add_utility(root, "Utility", &(cgpu->utility), false); - int last_share_pool = cgpu->last_share_pool_time > 0 ? - cgpu->last_share_pool : -1; - root = api_add_int(root, "Last Share Pool", &last_share_pool, false); - root = api_add_time(root, "Last Share Time", &(cgpu->last_share_pool_time), false); - root = api_add_mhtotal(root, "Total MH", &(cgpu->total_mhashes), false); - root = api_add_int(root, "Diff1 Work", &(cgpu->diff1), false); - root = api_add_diff(root, "Difficulty Accepted", &(cgpu->diff_accepted), false); - root = api_add_diff(root, "Difficulty Rejected", &(cgpu->diff_rejected), false); - root = api_add_diff(root, "Last Share Difficulty", &(cgpu->last_share_diff), false); - root = api_add_time(root, "Last Valid Work", &(cgpu->last_device_valid_work), false); - - root = print_data(root, buf, isjson, precom); - io_add(io_data, buf); - } -} -#endif - static void devstatus(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __maybe_unused char *param, bool isjson, __maybe_unused char group) { bool io_open = false; @@ -1817,7 +1718,7 @@ static void devstatus(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __ma numpga = numpgas(); #endif - if (numgpu == 0 && opt_n_threads == 0 && numpga == 0 && numasc == 0) { + if (numgpu == 0 && numpga == 0 && numasc == 0) { message(io_data, MSG_NODEVS, 0, NULL, isjson); return; } @@ -1854,16 +1755,6 @@ static void devstatus(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __ma } #endif -#ifdef WANT_CPUMINE - if (opt_n_threads > 0) { - for (i = 0; i < num_processors; i++) { - cpustatus(io_data, i, isjson, isjson && devcount > 0); - - devcount++; - } - } -#endif - if (isjson && io_open) io_close(io_data); } @@ -2088,40 +1979,6 @@ static void pgaidentify(struct io_data *io_data, __maybe_unused SOCKETTYPE c, ch } #endif -#ifdef WANT_CPUMINE -static void cpudev(struct io_data *io_data, __maybe_unused SOCKETTYPE c, char *param, bool isjson, __maybe_unused char group) -{ - bool io_open = false; - int id; - - if (opt_n_threads == 0) { - message(io_data, MSG_CPUNON, 0, NULL, isjson); - return; - } - - if (param == NULL || *param == '\0') { - message(io_data, MSG_MISID, 0, NULL, isjson); - return; - } - - id = atoi(param); - if (id < 0 || id >= num_processors) { - message(io_data, MSG_INVCPU, id, NULL, isjson); - return; - } - - message(io_data, MSG_CPUDEV, id, NULL, isjson); - - if (isjson) - io_open = io_add(io_data, COMSTR JSON_CPU); - - cpustatus(io_data, id, isjson, false); - - if (isjson && io_open) - io_close(io_data); -} -#endif - static void poolstatus(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __maybe_unused char *param, bool isjson, __maybe_unused char group) { struct api_data *root = NULL; @@ -2219,12 +2076,6 @@ static void summary(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __mayb bool io_open; double utility, mhs, work_utility; -#ifdef WANT_CPUMINE - char *algo = (char *)(algo_names[opt_algo]); - if (algo == NULL) - algo = (char *)NULLSTR; -#endif - message(io_data, MSG_SUMM, 0, NULL, isjson); io_open = io_add(io_data, isjson ? COMSTR JSON_SUMMARY : _SUMMARY COMSTR); @@ -2236,9 +2087,6 @@ static void summary(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __mayb work_utility = total_diff1 / ( total_secs ? total_secs : 1 ) * 60; root = api_add_elapsed(root, "Elapsed", &(total_secs), true); -#ifdef WANT_CPUMINE - root = api_add_string(root, "Algorithm", algo, false); -#endif root = api_add_mhs(root, "MHS av", &(mhs), false); root = api_add_uint(root, "Found Blocks", &(found_blocks), true); root = api_add_int(root, "Getworks", &(total_getworks), true); @@ -2419,28 +2267,6 @@ static void pgacount(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __may io_close(io_data); } -static void cpucount(struct io_data *io_data, __maybe_unused SOCKETTYPE c, __maybe_unused char *param, bool isjson, __maybe_unused char group) -{ - struct api_data *root = NULL; - char buf[TMPBUFSIZ]; - bool io_open; - int count = 0; - -#ifdef WANT_CPUMINE - count = opt_n_threads > 0 ? num_processors : 0; -#endif - - message(io_data, MSG_NUMCPU, 0, NULL, isjson); - io_open = io_add(io_data, isjson ? COMSTR JSON_CPUS : _CPUS COMSTR); - - root = api_add_int(root, "Count", &count, false); - - root = print_data(root, buf, isjson, false); - io_add(io_data, buf); - if (isjson && io_open) - io_close(io_data); -} - static void switchpool(struct io_data *io_data, __maybe_unused SOCKETTYPE c, char *param, bool isjson, __maybe_unused char group) { struct pool *pool; @@ -3540,13 +3366,9 @@ struct CMDS { { "pgaenable", pgaenable, true }, { "pgadisable", pgadisable, true }, { "pgaidentify", pgaidentify, true }, -#endif -#ifdef WANT_CPUMINE - { "cpu", cpudev, false }, #endif { "gpucount", gpucount, false }, { "pgacount", pgacount, false }, - { "cpucount", cpucount, false }, { "switchpool", switchpool, true }, { "addpool", addpool, true }, { "poolpriority", poolpriority, true }, diff --git a/cgminer.c b/cgminer.c index ec61e5e3..46ea2dd9 100644 --- a/cgminer.c +++ b/cgminer.c @@ -44,7 +44,6 @@ #include "miner.h" #include "findnonce.h" #include "adl.h" -#include "driver-cpu.h" #include "driver-opencl.h" #include "bench_block.h" #include "scrypt.h" @@ -89,7 +88,6 @@ int opt_log_interval = 5; int opt_queue = 1; int opt_scantime = 60; int opt_expiry = 120; -int opt_bench_algo = -1; static const bool opt_time = true; unsigned long long global_hashrate; @@ -113,7 +111,6 @@ static bool opt_removedisabled; int total_devices; struct cgpu_info **devices; bool have_opencl; -int opt_n_threads = -1; int mining_threads; int num_processors; #ifdef HAVE_CURSES @@ -869,36 +866,6 @@ static char *set_null(const char __maybe_unused *arg) /* These options are available from config file or commandline */ static struct opt_table opt_config_table[] = { -#ifdef WANT_CPUMINE - OPT_WITH_ARG("--algo|-a", - set_algo, show_algo, &opt_algo, - "Specify sha256 implementation for CPU mining:\n" - "\tauto\t\tBenchmark at startup and pick fastest algorithm" - "\n\tc\t\tLinux kernel sha256, implemented in C" -#ifdef WANT_SSE2_4WAY - "\n\t4way\t\ttcatm's 4-way SSE2 implementation" -#endif -#ifdef WANT_VIA_PADLOCK - "\n\tvia\t\tVIA padlock implementation" -#endif - "\n\tcryptopp\tCrypto++ C/C++ implementation" -#ifdef WANT_CRYPTOPP_ASM32 - "\n\tcryptopp_asm32\tCrypto++ 32-bit assembler implementation" -#endif -#ifdef WANT_X8632_SSE2 - "\n\tsse2_32\t\tSSE2 32 bit implementation for i386 machines" -#endif -#ifdef WANT_X8664_SSE2 - "\n\tsse2_64\t\tSSE2 64 bit implementation for x86_64 machines" -#endif -#ifdef WANT_X8664_SSE4 - "\n\tsse4_64\t\tSSE4.1 64 bit implementation for x86_64 machines" -#endif -#ifdef WANT_ALTIVEC_4WAY - "\n\taltivec_4way\tAltivec implementation for PowerPC G4 and G5 machines" -#endif - ), -#endif OPT_WITH_ARG("--api-allow", set_api_allow, NULL, NULL, "Allow API access only to the given list of [G:]IP[/Prefix] addresses[/subnets]"), @@ -936,20 +903,10 @@ static struct opt_table opt_config_table[] = { opt_set_bool, &opt_bfl_noncerange, "Use nonce range on bitforce devices if supported"), #endif -#ifdef WANT_CPUMINE - OPT_WITH_ARG("--bench-algo|-b", - set_int_0_to_9999, opt_show_intval, &opt_bench_algo, - opt_hidden), -#endif #ifdef HAVE_CURSES OPT_WITHOUT_ARG("--compact", opt_set_bool, &opt_compact, "Use compact display without per device statistics"), -#endif -#ifdef WANT_CPUMINE - OPT_WITH_ARG("--cpu-threads|-t", - force_nthreads_int, opt_show_intval, &opt_n_threads, - "Number of miner CPU threads"), #endif OPT_WITHOUT_ARG("--debug|-D", enable_debug, &opt_debug, @@ -968,11 +925,6 @@ static struct opt_table opt_config_table[] = { OPT_WITHOUT_ARG("--disable-rejecting", opt_set_bool, &opt_disable_pool, "Automatically disable pools that continually reject shares"), -#if defined(WANT_CPUMINE) && (defined(HAVE_OPENCL) || defined(USE_FPGA)) - OPT_WITHOUT_ARG("--enable-cpu|-C", - opt_set_bool, &opt_usecpu, - "Enable CPU mining with other mining (default: no CPU mining if other devices exist)"), -#endif OPT_WITH_ARG("--expiry|-E", set_int_0_to_9999, opt_show_intval, &opt_expiry, "Upper bound on how many seconds after getting work we consider a share from it stale"), @@ -1375,9 +1327,6 @@ static char *opt_verusage_and_exit(const char *extra) #ifdef HAVE_OPENCL "GPU " #endif -#ifdef WANT_CPUMINE - "CPU " -#endif #ifdef USE_BITFORCE "bitforce " #endif @@ -1905,7 +1854,6 @@ static int statusy; #ifdef HAVE_OPENCL struct cgpu_info gpus[MAX_GPUDEVICES]; /* Maximum number apparently possible */ #endif -struct cgpu_info *cpus; #ifdef HAVE_CURSES static inline void unlock_curses(void) @@ -2039,10 +1987,6 @@ static void curses_print_status(void) wattron(statuswin, A_BOLD); mvwprintw(statuswin, 0, 0, " " PACKAGE " version " VERSION " - Started: %s", datestamp); -#ifdef WANT_CPUMINE - if (opt_n_threads) - wprintw(statuswin, " CPU Algo: %s", algo_names[opt_algo]); -#endif wattroff(statuswin, A_BOLD); mvwhline(statuswin, 1, 0, '-', 80); mvwprintw(statuswin, 2, 0, " %s", statusline); @@ -4059,9 +4003,6 @@ void write_config(FILE *fcfg) if (opt_reorder) fprintf(fcfg, ",\n\"gpu-reorder\" : true"); #endif -#ifdef WANT_CPUMINE - fprintf(fcfg, ",\n\"algo\" : \"%s\"", algo_names[opt_algo]); -#endif /* Simple bool and int options */ struct opt_table *opt; @@ -6382,10 +6323,6 @@ static void *watchdog_thread(void __maybe_unused *userdata) if (thr->getwork || *denable == DEV_DISABLED) continue; -#ifdef WANT_CPUMINE - if (cgpu->drv->drv_id == DRIVER_CPU) - continue; -#endif if (cgpu->status != LIFE_WELL && (now.tv_sec - thr->last.tv_sec < WATCHDOG_SICK_TIME)) { if (cgpu->status != LIFE_INIT) applog(LOG_ERR, "%s: Recovered, declaring WELL!", dev_str); @@ -6459,10 +6396,6 @@ void print_summary(void) applog(LOG_WARNING, "Started at %s", datestamp); if (total_pools == 1) applog(LOG_WARNING, "Pool: %s", pools[0]->rpc_url); -#ifdef WANT_CPUMINE - if (opt_n_threads) - applog(LOG_WARNING, "CPU hasher algorithm used: %s", algo_names[opt_algo]); -#endif applog(LOG_WARNING, "Runtime: %d hrs : %d mins : %d secs", hours, mins, secs); displayed_hashes = total_mhashes_done / total_secs; if (displayed_hashes < 1) { @@ -6542,9 +6475,6 @@ static void clean_up(void) if (!opt_realquiet && successful_connect) print_summary(); - if (opt_n_threads) - free(cpus); - curl_global_cleanup(); } @@ -6798,15 +6728,6 @@ void enable_curses(void) { } #endif -/* TODO: fix need a dummy CPU device_drv even if no support for CPU mining */ -#ifndef WANT_CPUMINE -struct device_drv cpu_drv; -struct device_drv cpu_drv = { - .drv_id = DRIVER_CPU, - .name = "CPU", -}; -#endif - #ifdef USE_BFLSC extern struct device_drv bflsc_drv; #endif @@ -7183,10 +7104,6 @@ int main(int argc, char *argv[]) sprintf(packagename, "%s %s", PACKAGE, VERSION); -#ifdef WANT_CPUMINE - init_max_name_len(); -#endif - handler.sa_handler = &sighandler; handler.sa_flags = 0; sigemptyset(&handler.sa_mask); @@ -7202,15 +7119,6 @@ int main(int argc, char *argv[]) strcpy(cgminer_path, dirname(s)); free(s); strcat(cgminer_path, "/"); -#ifdef WANT_CPUMINE - // Hack to make cgminer silent when called recursively on WIN32 - int skip_to_bench = 0; - #if defined(WIN32) - char buf[32]; - if (GetEnvironmentVariable("CGMINER_BENCH_ALGO", buf, 16)) - skip_to_bench = 1; - #endif // defined(WIN32) -#endif devcursor = 8; logstart = devcursor + 1; @@ -7295,51 +7203,6 @@ int main(int argc, char *argv[]) usb_initialise(); #endif -#ifdef WANT_CPUMINE -#ifdef USE_SCRYPT - if (opt_scrypt) - set_scrypt_algo(&opt_algo); - else -#endif - if (0 <= opt_bench_algo) { - double rate = bench_algo_stage3(opt_bench_algo); - - if (!skip_to_bench) - printf("%.5f (%s)\n", rate, algo_names[opt_bench_algo]); - else { - // Write result to shared memory for parent -#if defined(WIN32) - char unique_name[64]; - - if (GetEnvironmentVariable("CGMINER_SHARED_MEM", unique_name, 32)) { - HANDLE map_handle = CreateFileMapping( - INVALID_HANDLE_VALUE, // use paging file - NULL, // default security attributes - PAGE_READWRITE, // read/write access - 0, // size: high 32-bits - 4096, // size: low 32-bits - unique_name // name of map object - ); - if (NULL != map_handle) { - void *shared_mem = MapViewOfFile( - map_handle, // object to map view of - FILE_MAP_WRITE, // read/write access - 0, // high offset: map from - 0, // low offset: beginning - 0 // default: map entire file - ); - if (NULL != shared_mem) - CopyMemory(shared_mem, &rate, sizeof(rate)); - (void)UnmapViewOfFile(shared_mem); - } - (void)CloseHandle(map_handle); - } -#endif - } - exit(0); - } -#endif - #ifdef HAVE_OPENCL if (!opt_nogpu) opencl_drv.drv_detect(); @@ -7376,10 +7239,6 @@ int main(int argc, char *argv[]) ztex_drv.drv_detect(); #endif -#ifdef WANT_CPUMINE - cpu_drv.drv_detect(); -#endif - if (devices_enabled == -1) { applog(LOG_ERR, "Devices detected:"); for (i = 0; i < total_devices; ++i) { @@ -7400,12 +7259,8 @@ int main(int argc, char *argv[]) quit (1, "Command line options set a device that doesn't exist"); enable_device(devices[i]); } else if (i < total_devices) { - if (opt_removedisabled) { - if (devices[i]->drv->drv_id == DRIVER_CPU) - --opt_n_threads; - } else { + if (!opt_removedisabled) enable_device(devices[i]); - } devices[i]->deven = DEV_DISABLED; } } @@ -7609,13 +7464,6 @@ begin_bench: pause_dynamic_threads(i); #endif -#ifdef WANT_CPUMINE - applog(LOG_INFO, "%d cpu miner threads started, " - "using SHA256 '%s' algorithm.", - opt_n_threads, - algo_names[opt_algo]); -#endif - cgtime(&total_tv_start); cgtime(&total_tv_end); diff --git a/configure.ac b/configure.ac index 4fb74a19..b09edcbc 100644 --- a/configure.ac +++ b/configure.ac @@ -126,14 +126,6 @@ if test -n "$CGMINER_SDK"; then LDFLAGS="-L$CGMINER_SDK/lib/$target $LDFLAGS" fi -cpumining="no" - -AC_ARG_ENABLE([cpumining],,[cpumining=$enableval] ) -if test "x$cpumining" = xyes; then - AC_DEFINE_UNQUOTED([WANT_CPUMINE], [1], [Enable CPUMINING]) -fi -AM_CONDITIONAL([HAS_CPUMINE], [test x$cpumining = xyes]) - opencl="yes" AC_ARG_ENABLE([opencl], @@ -329,54 +321,6 @@ else JANSSON_LIBS=-ljansson fi -dnl Find YASM -has_yasm=false -AC_PATH_PROG([YASM],[yasm],[false]) -if test "x$YASM" != "xfalse" ; then - AC_MSG_CHECKING([if yasm version is greater than 1.0.1]) - yasmver=`"$YASM" --version | head -1 | cut -d\ -f2` - yamajor=`echo $yasmver | cut -d. -f1` - yaminor=`echo $yasmver | cut -d. -f2` - yamini=`echo $yasmver | cut -d. -f3` - if test "$yamajor" -ge "1" ; then - if test "$yamajor" -eq "1" ; then - if test "$yaminor" -ge "0" ; then - if test "$yaminor" -eq "0"; then - if test "$yamini" -ge "1"; then - has_yasm=true - fi - else - has_yasm=true - fi - fi - fi - else - has_yasm=false - fi - if test "x$has_yasm" = "xtrue" ; then - AC_MSG_RESULT([yes]) - else - AC_MSG_RESULT([no]) - fi -fi -if test "x$has_yasm" = "xfalse" ; then - AC_MSG_NOTICE([yasm is required for the assembly algorithms. They will be skipped.]) -else - if test "x$have_x86_64" = xtrue; then - if test "x$have_win32" = xtrue; then - YASM_FMT="win64" - else - YASM_FMT="elf64" - fi - elif test "x$have_win32" = xtrue; then - YASM_FMT="coff" - else - YASM_FMT="elf32" - fi -fi - -AM_CONDITIONAL([HAS_YASM], [test x$has_yasm = xtrue]) - if test "x$icarus" != xno; then AC_ARG_WITH([libudev], [AC_HELP_STRING([--without-libudev], [Autodetect FPGAs using libudev (default enabled)])], [libudev=$withval], @@ -498,15 +442,12 @@ AC_SUBST(WS2_LIBS) AC_SUBST(MM_LIBS) AC_SUBST(MATH_LIBS) AC_SUBST(UDEV_LIBS) -AC_SUBST(YASM_FMT) AC_SUBST(ADL_CPPFLAGS) AC_CONFIG_FILES([ Makefile compat/Makefile compat/jansson/Makefile - x86_64/Makefile - x86_32/Makefile ccan/Makefile lib/Makefile ]) @@ -537,14 +478,14 @@ if test "x$opencl" != xno; then else echo " OpenCL...............: NOT FOUND. GPU mining support DISABLED" - if test "x$cpumining$bitforce$avalon$icarus$ztex$modminer$bflsc" = xnonononononono; then + if test "x$bitforce$avalon$icarus$ztex$modminer$bflsc" = xnonononononono; then AC_MSG_ERROR([No mining configured in]) fi echo " scrypt...............: Disabled (needs OpenCL)" fi else echo " OpenCL...............: Detection overrided. GPU mining support DISABLED" - if test "x$cpumining$bitforce$icarus$avalon$ztex$modminer$bflsc" = xnonononononono; then + if test "x$bitforce$icarus$avalon$ztex$modminer$bflsc" = xnonononononono; then AC_MSG_ERROR([No mining configured in]) fi echo " scrypt...............: Disabled (needs OpenCL)" @@ -601,12 +542,6 @@ if test "x$icarus" != xno; then echo " libudev.detection....: $libudev" fi -if test "x$cpumining" = xyes; then - echo - echo " CPU Mining...........: Enabled" - echo " ASM.(for CPU mining).: $has_yasm" -fi - echo echo "Compilation............: make (or gmake)" echo " CPPFLAGS.............: $CPPFLAGS" diff --git a/driver-cpu.c b/driver-cpu.c deleted file mode 100644 index 8f8b0d1f..00000000 --- a/driver-cpu.c +++ /dev/null @@ -1,863 +0,0 @@ -/* - * Copyright 2011-2012 Con Kolivas - * Copyright 2011-2012 Luke Dashjr - * Copyright 2010 Jeff Garzik - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 3 of the License, or (at your option) - * any later version. See COPYING for more details. - */ - -#include "config.h" - - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#ifndef WIN32 -#include -#include -#endif -#include - -#include "compat.h" -#include "miner.h" -#include "bench_block.h" -#include "driver-cpu.h" - -#if defined(unix) - #include - #include -#endif - -#if defined(__linux) && defined(cpu_set_t) /* Linux specific policy and affinity management */ -#include -static inline void drop_policy(void) -{ - struct sched_param param; - -#ifdef SCHED_BATCH -#ifdef SCHED_IDLE - if (unlikely(sched_setscheduler(0, SCHED_IDLE, ¶m) == -1)) -#endif - sched_setscheduler(0, SCHED_BATCH, ¶m); -#endif -} - -static inline void affine_to_cpu(int id, int cpu) -{ - cpu_set_t set; - - CPU_ZERO(&set); - CPU_SET(cpu, &set); - sched_setaffinity(0, sizeof(&set), &set); - applog(LOG_INFO, "Binding cpu mining thread %d to cpu %d", id, cpu); -} -#else -static inline void drop_policy(void) -{ -} - -static inline void affine_to_cpu(int __maybe_unused id, int __maybe_unused cpu) -{ -} -#endif - - - -/* TODO: resolve externals */ -extern char *set_int_range(const char *arg, int *i, int min, int max); -extern int dev_from_id(int thr_id); - - -/* chipset-optimized hash functions */ -extern bool ScanHash_4WaySSE2(struct thr_info*, const unsigned char *pmidstate, - unsigned char *pdata, unsigned char *phash1, unsigned char *phash, - const unsigned char *ptarget, - uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce); - -extern bool ScanHash_altivec_4way(struct thr_info*, const unsigned char *pmidstate, - unsigned char *pdata, - unsigned char *phash1, unsigned char *phash, - const unsigned char *ptarget, - uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce); - -extern bool scanhash_via(struct thr_info*, const unsigned char *pmidstate, - unsigned char *pdata, - unsigned char *phash1, unsigned char *phash, - const unsigned char *target, - uint32_t max_nonce, uint32_t *last_nonce, uint32_t n); - -extern bool scanhash_c(struct thr_info*, const unsigned char *midstate, unsigned char *data, - unsigned char *hash1, unsigned char *hash, - const unsigned char *target, - uint32_t max_nonce, uint32_t *last_nonce, uint32_t n); - -extern bool scanhash_cryptopp(struct thr_info*, const unsigned char *midstate,unsigned char *data, - unsigned char *hash1, unsigned char *hash, - const unsigned char *target, - uint32_t max_nonce, uint32_t *last_nonce, uint32_t n); - -extern bool scanhash_asm32(struct thr_info*, const unsigned char *midstate,unsigned char *data, - unsigned char *hash1, unsigned char *hash, - const unsigned char *target, - uint32_t max_nonce, uint32_t *last_nonce, uint32_t nonce); - -extern bool scanhash_sse2_64(struct thr_info*, const unsigned char *pmidstate, unsigned char *pdata, - unsigned char *phash1, unsigned char *phash, - const unsigned char *ptarget, - uint32_t max_nonce, uint32_t *last_nonce, - uint32_t nonce); - -extern bool scanhash_sse4_64(struct thr_info*, const unsigned char *pmidstate, unsigned char *pdata, - unsigned char *phash1, unsigned char *phash, - const unsigned char *ptarget, - uint32_t max_nonce, uint32_t *last_nonce, - uint32_t nonce); - -extern bool scanhash_sse2_32(struct thr_info*, const unsigned char *pmidstate, unsigned char *pdata, - unsigned char *phash1, unsigned char *phash, - const unsigned char *ptarget, - uint32_t max_nonce, uint32_t *last_nonce, - uint32_t nonce); - -extern bool scanhash_scrypt(struct thr_info *thr, int thr_id, unsigned char *pdata, unsigned char *scratchbuf, - const unsigned char *ptarget, - uint32_t max_nonce, unsigned long *hashes_done); - - - -#ifdef WANT_CPUMINE -static size_t max_name_len = 0; -static char *name_spaces_pad = NULL; -const char *algo_names[] = { - [ALGO_C] = "c", -#ifdef WANT_SSE2_4WAY - [ALGO_4WAY] = "4way", -#endif -#ifdef WANT_VIA_PADLOCK - [ALGO_VIA] = "via", -#endif - [ALGO_CRYPTOPP] = "cryptopp", -#ifdef WANT_CRYPTOPP_ASM32 - [ALGO_CRYPTOPP_ASM32] = "cryptopp_asm32", -#endif -#ifdef WANT_X8632_SSE2 - [ALGO_SSE2_32] = "sse2_32", -#endif -#ifdef WANT_X8664_SSE2 - [ALGO_SSE2_64] = "sse2_64", -#endif -#ifdef WANT_X8664_SSE4 - [ALGO_SSE4_64] = "sse4_64", -#endif -#ifdef WANT_ALTIVEC_4WAY - [ALGO_ALTIVEC_4WAY] = "altivec_4way", -#endif -#ifdef WANT_SCRYPT - [ALGO_SCRYPT] = "scrypt", -#endif -}; - -static const sha256_func sha256_funcs[] = { - [ALGO_C] = (sha256_func)scanhash_c, -#ifdef WANT_SSE2_4WAY - [ALGO_4WAY] = (sha256_func)ScanHash_4WaySSE2, -#endif -#ifdef WANT_ALTIVEC_4WAY - [ALGO_ALTIVEC_4WAY] = (sha256_func) ScanHash_altivec_4way, -#endif -#ifdef WANT_VIA_PADLOCK - [ALGO_VIA] = (sha256_func)scanhash_via, -#endif - [ALGO_CRYPTOPP] = (sha256_func)scanhash_cryptopp, -#ifdef WANT_CRYPTOPP_ASM32 - [ALGO_CRYPTOPP_ASM32] = (sha256_func)scanhash_asm32, -#endif -#ifdef WANT_X8632_SSE2 - [ALGO_SSE2_32] = (sha256_func)scanhash_sse2_32, -#endif -#ifdef WANT_X8664_SSE2 - [ALGO_SSE2_64] = (sha256_func)scanhash_sse2_64, -#endif -#ifdef WANT_X8664_SSE4 - [ALGO_SSE4_64] = (sha256_func)scanhash_sse4_64, -#endif -#ifdef WANT_SCRYPT - [ALGO_SCRYPT] = (sha256_func)scanhash_scrypt -#endif -}; -#endif - - - -#ifdef WANT_CPUMINE -#if defined(WANT_X8664_SSE4) && defined(__SSE4_1__) -enum sha256_algos opt_algo = ALGO_SSE4_64; -#elif defined(WANT_X8664_SSE2) && defined(__SSE2__) -enum sha256_algos opt_algo = ALGO_SSE2_64; -#elif defined(WANT_X8632_SSE2) && defined(__SSE2__) -enum sha256_algos opt_algo = ALGO_SSE2_32; -#else -enum sha256_algos opt_algo = ALGO_C; -#endif -bool opt_usecpu = false; -static int cpur_thr_id; -static bool forced_n_threads; -#endif - - - - -#ifdef WANT_CPUMINE -// Algo benchmark, crash-prone, system independent stage -double bench_algo_stage3( - enum sha256_algos algo -) -{ - // Use a random work block pulled from a pool - static uint8_t bench_block[] = { CGMINER_BENCHMARK_BLOCK }; - struct work work __attribute__((aligned(128))); - unsigned char hash1[64]; - - size_t bench_size = sizeof(work); - size_t work_size = sizeof(bench_block); - size_t min_size = (work_size < bench_size ? work_size : bench_size); - memset(&work, 0, sizeof(work)); - memcpy(&work, &bench_block, min_size); - - struct thr_info dummy = {0}; - - struct timeval end; - struct timeval start; - uint32_t max_nonce = (1<<22); - uint32_t last_nonce = 0; - - hex2bin(hash1, "00000000000000000000000000000000000000000000000000000000000000000000008000000000000000000000000000000000000000000000000000010000", 64); - - gettimeofday(&start, 0); - { - sha256_func func = sha256_funcs[algo]; - (*func)( - &dummy, - work.midstate, - work.data, - hash1, - work.hash, - work.target, - max_nonce, - &last_nonce, - work.blk.nonce - ); - } - gettimeofday(&end, 0); - - uint64_t usec_end = ((uint64_t)end.tv_sec)*1000*1000 + end.tv_usec; - uint64_t usec_start = ((uint64_t)start.tv_sec)*1000*1000 + start.tv_usec; - uint64_t usec_elapsed = usec_end - usec_start; - - double rate = -1.0; - if (0drv = &cpu_drv; - cgpu->deven = DEV_ENABLED; - cgpu->threads = 1; - cgpu->kname = algo_names[opt_algo]; - if (opt_scrypt) - cgpu->drv->max_diff = 0xffffffff; - add_cgpu(cgpu); - } -} - -static void reinit_cpu_device(struct cgpu_info *cpu) -{ - tq_push(control_thr[cpur_thr_id].q, cpu); -} - -static bool cpu_thread_prepare(struct thr_info *thr) -{ - thread_reportin(thr); - - return true; -} - -static uint64_t cpu_can_limit_work(struct thr_info __maybe_unused *thr) -{ - return 0xffff; -} - -static bool cpu_thread_init(struct thr_info *thr) -{ - const int thr_id = thr->id; - - /* Set worker threads to nice 19 and then preferentially to SCHED_IDLE - * and if that fails, then SCHED_BATCH. No need for this to be an - * error if it fails */ - setpriority(PRIO_PROCESS, 0, 19); - drop_policy(); - /* Cpu affinity only makes sense if the number of threads is a multiple - * of the number of CPUs */ - if (!(opt_n_threads % num_processors)) - affine_to_cpu(dev_from_id(thr_id), dev_from_id(thr_id) % num_processors); - return true; -} - -static int64_t cpu_scanhash(struct thr_info *thr, struct work *work, int64_t max_nonce) -{ - const int thr_id = thr->id; - unsigned char hash1[64]; - uint32_t first_nonce = work->blk.nonce; - uint32_t last_nonce; - bool rc; - - hex2bin(hash1, "00000000000000000000000000000000000000000000000000000000000000000000008000000000000000000000000000000000000000000000000000010000", 64); -CPUSearch: - last_nonce = first_nonce; - rc = false; - - /* scan nonces for a proof-of-work hash */ - { - sha256_func func = sha256_funcs[opt_algo]; - rc = (*func)( - thr, - work->midstate, - work->data, - hash1, - work->hash, - work->target, - max_nonce, - &last_nonce, - work->blk.nonce - ); - } - - /* if nonce found, submit work */ - if (unlikely(rc)) { - applog(LOG_DEBUG, "CPU %d found something?", dev_from_id(thr_id)); - submit_nonce(thr, work, last_nonce); - work->blk.nonce = last_nonce + 1; - goto CPUSearch; - } - else - if (unlikely(last_nonce == first_nonce)) - return 0; - - work->blk.nonce = last_nonce + 1; - return last_nonce - first_nonce + 1; -} - -struct device_drv cpu_drv = { - .drv_id = DRIVER_CPU, - .dname = "cpu", - .name = "CPU", - .drv_detect = cpu_detect, - .reinit_device = reinit_cpu_device, - .thread_prepare = cpu_thread_prepare, - .can_limit_work = cpu_can_limit_work, - .thread_init = cpu_thread_init, - .scanhash = cpu_scanhash, -}; -#endif - - - diff --git a/driver-cpu.h b/driver-cpu.h deleted file mode 100644 index dd4bcb86..00000000 --- a/driver-cpu.h +++ /dev/null @@ -1,65 +0,0 @@ -#ifndef __DEVICE_CPU_H__ -#define __DEVICE_CPU_H__ - -#include "miner.h" - -#include "config.h" -#include - -#ifndef OPT_SHOW_LEN -#define OPT_SHOW_LEN 80 -#endif - -#ifdef __SSE2__ -#define WANT_SSE2_4WAY 1 -#endif - -#ifdef __ALTIVEC__ -#define WANT_ALTIVEC_4WAY 1 -#endif - -#if defined(__i386__) && defined(HAS_YASM) && defined(__SSE2__) -#define WANT_X8632_SSE2 1 -#endif - -#if (defined(__i386__) || defined(__x86_64__)) && !defined(__APPLE__) -#define WANT_VIA_PADLOCK 1 -#endif - -#if defined(__x86_64__) && defined(HAS_YASM) -#define WANT_X8664_SSE2 1 -#endif - -#if defined(__x86_64__) && defined(HAS_YASM) && defined(__SSE4_1__) -#define WANT_X8664_SSE4 1 -#endif - -#ifdef USE_SCRYPT -#define WANT_SCRYPT -#endif - -enum sha256_algos { - ALGO_C, /* plain C */ - ALGO_4WAY, /* parallel SSE2 */ - ALGO_VIA, /* VIA padlock */ - ALGO_CRYPTOPP, /* Crypto++ (C) */ - ALGO_CRYPTOPP_ASM32, /* Crypto++ 32-bit assembly */ - ALGO_SSE2_32, /* SSE2 for x86_32 */ - ALGO_SSE2_64, /* SSE2 for x86_64 */ - ALGO_SSE4_64, /* SSE4 for x86_64 */ - ALGO_ALTIVEC_4WAY, /* parallel Altivec */ - ALGO_SCRYPT, /* scrypt */ -}; - -extern const char *algo_names[]; -extern bool opt_usecpu; -extern struct device_drv cpu_drv; - -extern char *set_algo(const char *arg, enum sha256_algos *algo); -extern void show_algo(char buf[OPT_SHOW_LEN], const enum sha256_algos *algo); -extern char *force_nthreads_int(const char *arg, int *i); -extern void init_max_name_len(); -extern double bench_algo_stage3(enum sha256_algos algo); -extern void set_scrypt_algo(enum sha256_algos *algo); - -#endif /* __DEVICE_CPU_H__ */ diff --git a/miner.h b/miner.h index 49f99c94..ab753b6d 100644 --- a/miner.h +++ b/miner.h @@ -210,7 +210,6 @@ enum drv_driver { DRIVER_BITFORCE, DRIVER_MODMINER, DRIVER_ZTEX, - DRIVER_CPU, DRIVER_BFLSC, DRIVER_AVALON, DRIVER_MAX @@ -927,7 +926,6 @@ extern bool hotplug_mode; extern int hotplug_time; extern struct list_head scan_devices; extern int nDevs; -extern int opt_n_threads; extern int num_processors; extern int hw_errors; extern bool use_syslog; @@ -943,13 +941,10 @@ extern bool opt_scrypt; #endif extern double total_secs; extern int mining_threads; -extern struct cgpu_info *cpus; extern int total_devices; extern struct cgpu_info **devices; extern int total_pools; extern struct pool **pools; -extern const char *algo_names[]; -extern enum sha256_algos opt_algo; extern struct strategies strategies[]; extern enum pool_strategy pool_strategy; extern int opt_rotate_period; diff --git a/sha256_4way.c b/sha256_4way.c deleted file mode 100644 index c99ba62e..00000000 --- a/sha256_4way.c +++ /dev/null @@ -1,488 +0,0 @@ -// Copyright (c) 2010 Satoshi Nakamoto -// Distributed under the MIT/X11 software license, see the accompanying -// file license.txt or http://www.opensource.org/licenses/mit-license.php. - -// tcatm's 4-way 128-bit SSE2 SHA-256 - -#include "driver-cpu.h" - -#ifdef WANT_SSE2_4WAY - -#include -#include - -#include -#include -#include - -#define NPAR 32 - -static void DoubleBlockSHA256(const void* pin, void* pout, const void* pinit, unsigned int hash[8][NPAR], const void* init2); - -static const unsigned int sha256_consts[] = { - 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, /* 0 */ - 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, - 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, /* 8 */ - 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, - 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, /* 16 */ - 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, - 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, /* 24 */ - 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, - 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, /* 32 */ - 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, - 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, /* 40 */ - 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, - 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, /* 48 */ - 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, - 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, /* 56 */ - 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 -}; - - -static inline __m128i Ch(const __m128i b, const __m128i c, const __m128i d) { - return _mm_xor_si128(_mm_and_si128(b,c),_mm_andnot_si128(b,d)); -} - -static inline __m128i Maj(const __m128i b, const __m128i c, const __m128i d) { - return _mm_xor_si128(_mm_xor_si128(_mm_and_si128(b,c),_mm_and_si128(b,d)),_mm_and_si128(c,d)); -} - -static inline __m128i ROTR(__m128i x, const int n) { - return _mm_or_si128(_mm_srli_epi32(x, n),_mm_slli_epi32(x, 32 - n)); -} - -static inline __m128i SHR(__m128i x, const int n) { - return _mm_srli_epi32(x, n); -} - -/* SHA256 Functions */ -#define BIGSIGMA0_256(x) (_mm_xor_si128(_mm_xor_si128(ROTR((x), 2),ROTR((x), 13)),ROTR((x), 22))) -#define BIGSIGMA1_256(x) (_mm_xor_si128(_mm_xor_si128(ROTR((x), 6),ROTR((x), 11)),ROTR((x), 25))) - - -#define SIGMA0_256(x) (_mm_xor_si128(_mm_xor_si128(ROTR((x), 7),ROTR((x), 18)), SHR((x), 3 ))) -#define SIGMA1_256(x) (_mm_xor_si128(_mm_xor_si128(ROTR((x),17),ROTR((x), 19)), SHR((x), 10))) - -static inline unsigned int store32(const __m128i x, int i) { - union { unsigned int ret[4]; __m128i x; } box; - box.x = x; - return box.ret[i]; -} - -static inline void store_epi32(const __m128i x, unsigned int *x0, unsigned int *x1, unsigned int *x2, unsigned int *x3) { - union { unsigned int ret[4]; __m128i x; } box; - box.x = x; - *x0 = box.ret[3]; *x1 = box.ret[2]; *x2 = box.ret[1]; *x3 = box.ret[0]; -} - -#define add4(x0, x1, x2, x3) _mm_add_epi32(_mm_add_epi32(x0, x1),_mm_add_epi32( x2,x3)) -#define add5(x0, x1, x2, x3, x4) _mm_add_epi32(add4(x0, x1, x2, x3), x4) - -#define SHA256ROUND(a, b, c, d, e, f, g, h, i, w) \ - T1 = add5(h, BIGSIGMA1_256(e), Ch(e, f, g), _mm_set1_epi32(sha256_consts[i]), w); \ -d = _mm_add_epi32(d, T1); \ -h = _mm_add_epi32(T1, _mm_add_epi32(BIGSIGMA0_256(a), Maj(a, b, c))); - -static inline void dumpreg(__m128i x, char *msg) { - union { unsigned int ret[4]; __m128i x; } box; - box.x = x ; - printf("%s %08x %08x %08x %08x\n", msg, box.ret[0], box.ret[1], box.ret[2], box.ret[3]); -} - -#if 1 -#define dumpstate(i) printf("%s: %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", \ - __func__, store32(w0, i), store32(a, i), store32(b, i), store32(c, i), store32(d, i), store32(e, i), store32(f, i), store32(g, i), store32(h, i)); -#else -#define dumpstate() -#endif - -static const unsigned int pSHA256InitState[8] = -{0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19}; - - -bool ScanHash_4WaySSE2(struct thr_info*thr, const unsigned char *pmidstate, - unsigned char *pdata, - unsigned char *phash1, unsigned char *phash, - const unsigned char *ptarget, - uint32_t max_nonce, uint32_t *last_nonce, - uint32_t nonce) -{ - unsigned int *nNonce_p = (unsigned int*)(pdata + 76); - - pdata += 64; - - for (;;) - { - unsigned int thash[9][NPAR] __attribute__((aligned(128))); - int j; - - nonce += NPAR; - *nNonce_p = nonce; - - DoubleBlockSHA256(pdata, phash1, pmidstate, thash, pSHA256InitState); - - for (j = 0; j < NPAR; j++) - { - if (unlikely(thash[7][j] == 0)) - { - int i; - - for (i = 0; i < 32/4; i++) - ((unsigned int*)phash)[i] = thash[i][j]; - - if (fulltest(phash, ptarget)) { - nonce += j; - *last_nonce = nonce; - *nNonce_p = nonce; - return true; - } - } - } - - if ((nonce >= max_nonce) || thr->work_restart) - { - *last_nonce = nonce; - return false; - } - } -} - - -static void DoubleBlockSHA256(const void* pin, void* pad, const void *pre, unsigned int thash[9][NPAR], const void *init) -{ - unsigned int* In = (unsigned int*)pin; - unsigned int* Pad = (unsigned int*)pad; - unsigned int* hPre = (unsigned int*)pre; - unsigned int* hInit = (unsigned int*)init; - unsigned int /* i, j, */ k; - - /* vectors used in calculation */ - __m128i w0, w1, w2, w3, w4, w5, w6, w7; - __m128i w8, w9, w10, w11, w12, w13, w14, w15; - __m128i T1; - __m128i a, b, c, d, e, f, g, h; - __m128i nonce, preNonce; - - /* nonce offset for vector */ - __m128i offset = _mm_set_epi32(0x00000003, 0x00000002, 0x00000001, 0x00000000); - - - preNonce = _mm_add_epi32(_mm_set1_epi32(In[3]), offset); - - for(k = 0; k -#include - -//#include -#include -#include - -#define NPAR 32 - -static void DoubleBlockSHA256(const void* pin, void* pout, const void* pinit, unsigned int hash[8][NPAR], const void* init2); - -static const unsigned int sha256_consts[] = { - 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, /* 0 */ - 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, - 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, /* 8 */ - 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, - 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, /* 16 */ - 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, - 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, /* 24 */ - 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, - 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, /* 32 */ - 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, - 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, /* 40 */ - 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, - 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, /* 48 */ - 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, - 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, /* 56 */ - 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 -}; - - -static inline vector unsigned int Ch(const vector unsigned int b, const vector unsigned int c, const vector unsigned int d) { - return vec_sel(d,c,b); -} - -static inline vector unsigned int Maj(const vector unsigned int b, const vector unsigned int c, const vector unsigned int d) { - return vec_sel(b,c, vec_xor(b,d)); -} - -/* RotateRight(x, n) := RotateLeft(x, 32-n) */ -/* SHA256 Functions */ -#define BIGSIGMA0_256(x) (vec_xor(vec_xor(vec_rl((x), (vector unsigned int)(32-2)),vec_rl((x), (vector unsigned int)(32-13))),vec_rl((x), (vector unsigned int)(32-22)))) -#define BIGSIGMA1_256(x) (vec_xor(vec_xor(vec_rl((x), (vector unsigned int)(32-6)),vec_rl((x), (vector unsigned int)(32-11))),vec_rl((x), (vector unsigned int)(32-25)))) - -#define SIGMA0_256(x) (vec_xor(vec_xor(vec_rl((x), (vector unsigned int)(32- 7)),vec_rl((x), (vector unsigned int)(32-18))), vec_sr((x), (vector unsigned int)(3 )))) -#define SIGMA1_256(x) (vec_xor(vec_xor(vec_rl((x), (vector unsigned int)(32-17)),vec_rl((x), (vector unsigned int)(32-19))), vec_sr((x), (vector unsigned int)(10)))) - -#define add4(x0, x1, x2, x3) vec_add(vec_add(x0, x1),vec_add( x2,x3)) -#define add5(x0, x1, x2, x3, x4) vec_add(add4(x0, x1, x2, x3), x4) - -#define SHA256ROUND(a, b, c, d, e, f, g, h, i, w) \ - T1 = add5(h, BIGSIGMA1_256(e), Ch(e, f, g), (vector unsigned int)(sha256_consts[i],sha256_consts[i],sha256_consts[i],sha256_consts[i]), w); \ - d = vec_add(d, T1); \ - h = vec_add(T1, vec_add(BIGSIGMA0_256(a), Maj(a, b, c))); - - -static const unsigned int pSHA256InitState[8] = -{0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19}; - - -bool ScanHash_altivec_4way(struct thr_info*thr, const unsigned char *pmidstate, - unsigned char *pdata, - unsigned char *phash1, unsigned char *phash, - const unsigned char *ptarget, - uint32_t max_nonce, uint32_t *last_nonce, - uint32_t nonce) -{ - unsigned int *nNonce_p = (unsigned int*)(pdata + 76); - - pdata += 64; - - for (;;) - { - unsigned int thash[9][NPAR] __attribute__((aligned(128))); - int j; - - *nNonce_p = nonce; - - DoubleBlockSHA256(pdata, phash1, pmidstate, thash, pSHA256InitState); - - for (j = 0; j < NPAR; j++) - { - if (unlikely(thash[7][j] == 0)) - { - int i; - - for (i = 0; i < 32/4; i++) - ((unsigned int*)phash)[i] = thash[i][j]; - - if (fulltest(phash, ptarget)) { - nonce += j; - *last_nonce = nonce; - *nNonce_p = nonce; - return true; - } - } - } - - if ((nonce >= max_nonce) || thr->work_restart) - { - *last_nonce = nonce; - return false; - } - - nonce += NPAR; - } -} - - -static void DoubleBlockSHA256(const void* pin, void* pad, const void *pre, unsigned int thash[9][NPAR], const void *init) -{ - unsigned int* In = (unsigned int*)pin; - unsigned int* Pad = (unsigned int*)pad; - unsigned int* hPre = (unsigned int*)pre; - unsigned int* hInit = (unsigned int*)init; - unsigned int /* i, j, */ k; - - /* vectors used in calculation */ - vector unsigned int w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15; - vector unsigned int T1; - vector unsigned int a, b, c, d, e, f, g, h; - vector unsigned int nonce, preNonce; - - /* nonce offset for vector */ - vector unsigned int offset = (vector unsigned int)(0, 1, 2, 3); - - preNonce = vec_add((vector unsigned int)(In[3],In[3],In[3],In[3]), offset); - - for(k = 0; k try if it´s faster to compare the results with the target inside this function */ - } - -} - -#endif /* WANT_ALTIVEC_4WAY */ - diff --git a/sha256_cryptopp.c b/sha256_cryptopp.c deleted file mode 100644 index 607f0c46..00000000 --- a/sha256_cryptopp.c +++ /dev/null @@ -1,609 +0,0 @@ - -#include "config.h" - -#include -#include -#include -#include -#include -#include "miner.h" - -typedef uint32_t word32; - -static word32 rotrFixed(word32 word, unsigned int shift) -{ - return (word >> shift) | (word << (32 - shift)); -} - -#define blk0(i) (W[i] = data[i]) - -static const word32 SHA256_K[64] = { - 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, - 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, - 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, - 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, - 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, - 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, - 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, - 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, - 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, - 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, - 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, - 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, - 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, - 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, - 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, - 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 -}; - -#define blk2(i) (W[i&15]+=s1(W[(i-2)&15])+W[(i-7)&15]+s0(W[(i-15)&15])) - -#define Ch(x,y,z) (z^(x&(y^z))) -#define Maj(x,y,z) (y^((x^y)&(y^z))) - -#define a(i) T[(0-i)&7] -#define b(i) T[(1-i)&7] -#define c(i) T[(2-i)&7] -#define d(i) T[(3-i)&7] -#define e(i) T[(4-i)&7] -#define f(i) T[(5-i)&7] -#define g(i) T[(6-i)&7] -#define h(i) T[(7-i)&7] - -#define R(i) h(i)+=S1(e(i))+Ch(e(i),f(i),g(i))+SHA256_K[i+j]+(j?blk2(i):blk0(i));\ - d(i)+=h(i);h(i)+=S0(a(i))+Maj(a(i),b(i),c(i)) - -// for SHA256 -#define S0(x) (rotrFixed(x,2)^rotrFixed(x,13)^rotrFixed(x,22)) -#define S1(x) (rotrFixed(x,6)^rotrFixed(x,11)^rotrFixed(x,25)) -#define s0(x) (rotrFixed(x,7)^rotrFixed(x,18)^(x>>3)) -#define s1(x) (rotrFixed(x,17)^rotrFixed(x,19)^(x>>10)) - -static void SHA256_Transform(word32 *state, const word32 *data) -{ - word32 W[16] = { }; - word32 T[8]; - unsigned int j; - - /* Copy context->state[] to working vars */ - memcpy(T, state, sizeof(T)); - /* 64 operations, partially loop unrolled */ - for (j=0; j<64; j+=16) - { - R( 0); R( 1); R( 2); R( 3); - R( 4); R( 5); R( 6); R( 7); - R( 8); R( 9); R(10); R(11); - R(12); R(13); R(14); R(15); - } - /* Add the working vars back into context.state[] */ - state[0] += a(0); - state[1] += b(0); - state[2] += c(0); - state[3] += d(0); - state[4] += e(0); - state[5] += f(0); - state[6] += g(0); - state[7] += h(0); -} - -static void runhash(void *state, const void *input, const void *init) -{ - memcpy(state, init, 32); - SHA256_Transform(state, input); -} - -/* suspiciously similar to ScanHash* from bitcoin */ -bool scanhash_cryptopp(struct thr_info*thr, const unsigned char *midstate, - unsigned char *data, - unsigned char *hash1, unsigned char *hash, - const unsigned char *target, - uint32_t max_nonce, uint32_t *last_nonce, - uint32_t n) -{ - uint32_t *hash32 = (uint32_t *) hash; - uint32_t *nonce = (uint32_t *)(data + 76); - - data += 64; - - while (1) { - n++; - *nonce = n; - - runhash(hash1, data, midstate); - runhash(hash, hash1, sha256_init_state); - - if (unlikely((hash32[7] == 0) && fulltest(hash, target))) { - *last_nonce = n; - return true; - } - - if ((n >= max_nonce) || thr->work_restart) { - *last_nonce = n; - return false; - } - } -} - -#if defined(WANT_CRYPTOPP_ASM32) - -#define CRYPTOPP_FASTCALL -#define CRYPTOPP_BOOL_X86 1 -#define CRYPTOPP_BOOL_X64 0 -#define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 0 - -#ifdef CRYPTOPP_GENERATE_X64_MASM - #define AS1(x) x*newline* - #define AS2(x, y) x, y*newline* - #define AS3(x, y, z) x, y, z*newline* - #define ASS(x, y, a, b, c, d) x, y, a*64+b*16+c*4+d*newline* - #define ASL(x) label##x:*newline* - #define ASJ(x, y, z) x label##y*newline* - #define ASC(x, y) x label##y*newline* - #define AS_HEX(y) 0##y##h -#elif defined(_MSC_VER) || defined(__BORLANDC__) - #define CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY - #define AS1(x) __asm {x} - #define AS2(x, y) __asm {x, y} - #define AS3(x, y, z) __asm {x, y, z} - #define ASS(x, y, a, b, c, d) __asm {x, y, (a)*64+(b)*16+(c)*4+(d)} - #define ASL(x) __asm {label##x:} - #define ASJ(x, y, z) __asm {x label##y} - #define ASC(x, y) __asm {x label##y} - #define CRYPTOPP_NAKED __declspec(naked) - #define AS_HEX(y) 0x##y -#else - #define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY - // define these in two steps to allow arguments to be expanded - #define GNU_AS1(x) #x ";" - #define GNU_AS2(x, y) #x ", " #y ";" - #define GNU_AS3(x, y, z) #x ", " #y ", " #z ";" - #define GNU_ASL(x) "\n" #x ":" - #define GNU_ASJ(x, y, z) #x " " #y #z ";" - #define AS1(x) GNU_AS1(x) - #define AS2(x, y) GNU_AS2(x, y) - #define AS3(x, y, z) GNU_AS3(x, y, z) - #define ASS(x, y, a, b, c, d) #x ", " #y ", " #a "*64+" #b "*16+" #c "*4+" #d ";" - #define ASL(x) GNU_ASL(x) - #define ASJ(x, y, z) GNU_ASJ(x, y, z) - #define ASC(x, y) #x " " #y ";" - #define CRYPTOPP_NAKED - #define AS_HEX(y) 0x##y -#endif - -#define IF0(y) -#define IF1(y) y - -#ifdef CRYPTOPP_GENERATE_X64_MASM -#define ASM_MOD(x, y) ((x) MOD (y)) -#define XMMWORD_PTR XMMWORD PTR -#else -// GNU assembler doesn't seem to have mod operator -#define ASM_MOD(x, y) ((x)-((x)/(y))*(y)) -// GAS 2.15 doesn't support XMMWORD PTR. it seems necessary only for MASM -#define XMMWORD_PTR -#endif - -#if CRYPTOPP_BOOL_X86 - #define AS_REG_1 ecx - #define AS_REG_2 edx - #define AS_REG_3 esi - #define AS_REG_4 edi - #define AS_REG_5 eax - #define AS_REG_6 ebx - #define AS_REG_7 ebp - #define AS_REG_1d ecx - #define AS_REG_2d edx - #define AS_REG_3d esi - #define AS_REG_4d edi - #define AS_REG_5d eax - #define AS_REG_6d ebx - #define AS_REG_7d ebp - #define WORD_SZ 4 - #define WORD_REG(x) e##x - #define WORD_PTR DWORD PTR - #define AS_PUSH_IF86(x) AS1(push e##x) - #define AS_POP_IF86(x) AS1(pop e##x) - #define AS_JCXZ jecxz -#elif CRYPTOPP_BOOL_X64 - #ifdef CRYPTOPP_GENERATE_X64_MASM - #define AS_REG_1 rcx - #define AS_REG_2 rdx - #define AS_REG_3 r8 - #define AS_REG_4 r9 - #define AS_REG_5 rax - #define AS_REG_6 r10 - #define AS_REG_7 r11 - #define AS_REG_1d ecx - #define AS_REG_2d edx - #define AS_REG_3d r8d - #define AS_REG_4d r9d - #define AS_REG_5d eax - #define AS_REG_6d r10d - #define AS_REG_7d r11d - #else - #define AS_REG_1 rdi - #define AS_REG_2 rsi - #define AS_REG_3 rdx - #define AS_REG_4 rcx - #define AS_REG_5 r8 - #define AS_REG_6 r9 - #define AS_REG_7 r10 - #define AS_REG_1d edi - #define AS_REG_2d esi - #define AS_REG_3d edx - #define AS_REG_4d ecx - #define AS_REG_5d r8d - #define AS_REG_6d r9d - #define AS_REG_7d r10d - #endif - #define WORD_SZ 8 - #define WORD_REG(x) r##x - #define WORD_PTR QWORD PTR - #define AS_PUSH_IF86(x) - #define AS_POP_IF86(x) - #define AS_JCXZ jrcxz -#endif - -static void CRYPTOPP_FASTCALL X86_SHA256_HashBlocks(word32 *state, const word32 *data, size_t len -#if defined(_MSC_VER) && (_MSC_VER == 1200) - , ... // VC60 workaround: prevent VC 6 from inlining this function -#endif - ) -{ -#if defined(_MSC_VER) && (_MSC_VER == 1200) - AS2(mov ecx, [state]) - AS2(mov edx, [data]) -#endif - - #define LOCALS_SIZE 8*4 + 16*4 + 4*WORD_SZ - #define H(i) [BASE+ASM_MOD(1024+7-(i),8)*4] - #define G(i) H(i+1) - #define F(i) H(i+2) - #define E(i) H(i+3) - #define D(i) H(i+4) - #define C(i) H(i+5) - #define B(i) H(i+6) - #define A(i) H(i+7) - #define Wt(i) BASE+8*4+ASM_MOD(1024+15-(i),16)*4 - #define Wt_2(i) Wt((i)-2) - #define Wt_15(i) Wt((i)-15) - #define Wt_7(i) Wt((i)-7) - #define K_END [BASE+8*4+16*4+0*WORD_SZ] - #define STATE_SAVE [BASE+8*4+16*4+1*WORD_SZ] - #define DATA_SAVE [BASE+8*4+16*4+2*WORD_SZ] - #define DATA_END [BASE+8*4+16*4+3*WORD_SZ] - #define Kt(i) WORD_REG(si)+(i)*4 -#if CRYPTOPP_BOOL_X86 - #define BASE esp+4 -#elif defined(__GNUC__) - #define BASE r8 -#else - #define BASE rsp -#endif - -#define RA0(i, edx, edi) \ - AS2( add edx, [Kt(i)] )\ - AS2( add edx, [Wt(i)] )\ - AS2( add edx, H(i) )\ - -#define RA1(i, edx, edi) - -#define RB0(i, edx, edi) - -#define RB1(i, edx, edi) \ - AS2( mov AS_REG_7d, [Wt_2(i)] )\ - AS2( mov edi, [Wt_15(i)])\ - AS2( mov ebx, AS_REG_7d )\ - AS2( shr AS_REG_7d, 10 )\ - AS2( ror ebx, 17 )\ - AS2( xor AS_REG_7d, ebx )\ - AS2( ror ebx, 2 )\ - AS2( xor ebx, AS_REG_7d )/* s1(W_t-2) */\ - AS2( add ebx, [Wt_7(i)])\ - AS2( mov AS_REG_7d, edi )\ - AS2( shr AS_REG_7d, 3 )\ - AS2( ror edi, 7 )\ - AS2( add ebx, [Wt(i)])/* s1(W_t-2) + W_t-7 + W_t-16 */\ - AS2( xor AS_REG_7d, edi )\ - AS2( add edx, [Kt(i)])\ - AS2( ror edi, 11 )\ - AS2( add edx, H(i) )\ - AS2( xor AS_REG_7d, edi )/* s0(W_t-15) */\ - AS2( add AS_REG_7d, ebx )/* W_t = s1(W_t-2) + W_t-7 + s0(W_t-15) W_t-16*/\ - AS2( mov [Wt(i)], AS_REG_7d)\ - AS2( add edx, AS_REG_7d )\ - -#define ROUND(i, r, eax, ecx, edi, edx)\ - /* in: edi = E */\ - /* unused: eax, ecx, temp: ebx, AS_REG_7d, out: edx = T1 */\ - AS2( mov edx, F(i) )\ - AS2( xor edx, G(i) )\ - AS2( and edx, edi )\ - AS2( xor edx, G(i) )/* Ch(E,F,G) = (G^(E&(F^G))) */\ - AS2( mov AS_REG_7d, edi )\ - AS2( ror edi, 6 )\ - AS2( ror AS_REG_7d, 25 )\ - RA##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\ - AS2( xor AS_REG_7d, edi )\ - AS2( ror edi, 5 )\ - AS2( xor AS_REG_7d, edi )/* S1(E) */\ - AS2( add edx, AS_REG_7d )/* T1 = S1(E) + Ch(E,F,G) + H + Wt + Kt */\ - RB##r(i, edx, edi )/* H + Wt + Kt + Ch(E,F,G) */\ - /* in: ecx = A, eax = B^C, edx = T1 */\ - /* unused: edx, temp: ebx, AS_REG_7d, out: eax = A, ecx = B^C, edx = E */\ - AS2( mov ebx, ecx )\ - AS2( xor ecx, B(i) )/* A^B */\ - AS2( and eax, ecx )\ - AS2( xor eax, B(i) )/* Maj(A,B,C) = B^((A^B)&(B^C) */\ - AS2( mov AS_REG_7d, ebx )\ - AS2( ror ebx, 2 )\ - AS2( add eax, edx )/* T1 + Maj(A,B,C) */\ - AS2( add edx, D(i) )\ - AS2( mov D(i), edx )\ - AS2( ror AS_REG_7d, 22 )\ - AS2( xor AS_REG_7d, ebx )\ - AS2( ror ebx, 11 )\ - AS2( xor AS_REG_7d, ebx )\ - AS2( add eax, AS_REG_7d )/* T1 + S0(A) + Maj(A,B,C) */\ - AS2( mov H(i), eax )\ - -#define SWAP_COPY(i) \ - AS2( mov WORD_REG(bx), [WORD_REG(dx)+i*WORD_SZ])\ - AS1( bswap WORD_REG(bx))\ - AS2( mov [Wt(i*(1+CRYPTOPP_BOOL_X64)+CRYPTOPP_BOOL_X64)], WORD_REG(bx)) - -#if defined(__GNUC__) - #if CRYPTOPP_BOOL_X64 - FixedSizeAlignedSecBlock workspace; - #endif - __asm__ __volatile__ - ( - #if CRYPTOPP_BOOL_X64 - "lea %4, %%r8;" - #endif - ".intel_syntax noprefix;" -#elif defined(CRYPTOPP_GENERATE_X64_MASM) - ALIGN 8 - X86_SHA256_HashBlocks PROC FRAME - rex_push_reg rsi - push_reg rdi - push_reg rbx - push_reg rbp - alloc_stack(LOCALS_SIZE+8) - .endprolog - mov rdi, r8 - lea rsi, [?SHA256_K@CryptoPP@@3QBIB + 48*4] -#endif - -#if CRYPTOPP_BOOL_X86 - #ifndef __GNUC__ - AS2( mov edi, [len]) - AS2( lea WORD_REG(si), [SHA256_K+48*4]) - #endif - #if !defined(_MSC_VER) || (_MSC_VER < 1400) - AS_PUSH_IF86(bx) - #endif - - AS_PUSH_IF86(bp) - AS2( mov ebx, esp) - AS2( and esp, -16) - AS2( sub WORD_REG(sp), LOCALS_SIZE) - AS_PUSH_IF86(bx) -#endif - AS2( mov STATE_SAVE, WORD_REG(cx)) - AS2( mov DATA_SAVE, WORD_REG(dx)) - AS2( lea WORD_REG(ax), [WORD_REG(di) + WORD_REG(dx)]) - AS2( mov DATA_END, WORD_REG(ax)) - AS2( mov K_END, WORD_REG(si)) - -#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE -#if CRYPTOPP_BOOL_X86 - AS2( test edi, 1) - ASJ( jnz, 2, f) - AS1( dec DWORD PTR K_END) -#endif - AS2( movdqa xmm0, XMMWORD_PTR [WORD_REG(cx)+0*16]) - AS2( movdqa xmm1, XMMWORD_PTR [WORD_REG(cx)+1*16]) -#endif - -#if CRYPTOPP_BOOL_X86 -#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE - ASJ( jmp, 0, f) -#endif - ASL(2) // non-SSE2 - AS2( mov esi, ecx) - AS2( lea edi, A(0)) - AS2( mov ecx, 8) - AS1( rep movsd) - AS2( mov esi, K_END) - ASJ( jmp, 3, f) -#endif - -#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE - ASL(0) - AS2( movdqa E(0), xmm1) - AS2( movdqa A(0), xmm0) -#endif -#if CRYPTOPP_BOOL_X86 - ASL(3) -#endif - AS2( sub WORD_REG(si), 48*4) - SWAP_COPY(0) SWAP_COPY(1) SWAP_COPY(2) SWAP_COPY(3) - SWAP_COPY(4) SWAP_COPY(5) SWAP_COPY(6) SWAP_COPY(7) -#if CRYPTOPP_BOOL_X86 - SWAP_COPY(8) SWAP_COPY(9) SWAP_COPY(10) SWAP_COPY(11) - SWAP_COPY(12) SWAP_COPY(13) SWAP_COPY(14) SWAP_COPY(15) -#endif - AS2( mov edi, E(0)) // E - AS2( mov eax, B(0)) // B - AS2( xor eax, C(0)) // B^C - AS2( mov ecx, A(0)) // A - - ROUND(0, 0, eax, ecx, edi, edx) - ROUND(1, 0, ecx, eax, edx, edi) - ROUND(2, 0, eax, ecx, edi, edx) - ROUND(3, 0, ecx, eax, edx, edi) - ROUND(4, 0, eax, ecx, edi, edx) - ROUND(5, 0, ecx, eax, edx, edi) - ROUND(6, 0, eax, ecx, edi, edx) - ROUND(7, 0, ecx, eax, edx, edi) - ROUND(8, 0, eax, ecx, edi, edx) - ROUND(9, 0, ecx, eax, edx, edi) - ROUND(10, 0, eax, ecx, edi, edx) - ROUND(11, 0, ecx, eax, edx, edi) - ROUND(12, 0, eax, ecx, edi, edx) - ROUND(13, 0, ecx, eax, edx, edi) - ROUND(14, 0, eax, ecx, edi, edx) - ROUND(15, 0, ecx, eax, edx, edi) - - ASL(1) - AS2(add WORD_REG(si), 4*16) - ROUND(0, 1, eax, ecx, edi, edx) - ROUND(1, 1, ecx, eax, edx, edi) - ROUND(2, 1, eax, ecx, edi, edx) - ROUND(3, 1, ecx, eax, edx, edi) - ROUND(4, 1, eax, ecx, edi, edx) - ROUND(5, 1, ecx, eax, edx, edi) - ROUND(6, 1, eax, ecx, edi, edx) - ROUND(7, 1, ecx, eax, edx, edi) - ROUND(8, 1, eax, ecx, edi, edx) - ROUND(9, 1, ecx, eax, edx, edi) - ROUND(10, 1, eax, ecx, edi, edx) - ROUND(11, 1, ecx, eax, edx, edi) - ROUND(12, 1, eax, ecx, edi, edx) - ROUND(13, 1, ecx, eax, edx, edi) - ROUND(14, 1, eax, ecx, edi, edx) - ROUND(15, 1, ecx, eax, edx, edi) - AS2( cmp WORD_REG(si), K_END) - ASJ( jb, 1, b) - - AS2( mov WORD_REG(dx), DATA_SAVE) - AS2( add WORD_REG(dx), 64) - AS2( mov AS_REG_7, STATE_SAVE) - AS2( mov DATA_SAVE, WORD_REG(dx)) - -#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE -#if CRYPTOPP_BOOL_X86 - AS2( test DWORD PTR K_END, 1) - ASJ( jz, 4, f) -#endif - AS2( movdqa xmm1, XMMWORD_PTR [AS_REG_7+1*16]) - AS2( movdqa xmm0, XMMWORD_PTR [AS_REG_7+0*16]) - AS2( paddd xmm1, E(0)) - AS2( paddd xmm0, A(0)) - AS2( movdqa [AS_REG_7+1*16], xmm1) - AS2( movdqa [AS_REG_7+0*16], xmm0) - AS2( cmp WORD_REG(dx), DATA_END) - ASJ( jb, 0, b) -#endif - -#if CRYPTOPP_BOOL_X86 -#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE - ASJ( jmp, 5, f) - ASL(4) // non-SSE2 -#endif - AS2( add [AS_REG_7+0*4], ecx) // A - AS2( add [AS_REG_7+4*4], edi) // E - AS2( mov eax, B(0)) - AS2( mov ebx, C(0)) - AS2( mov ecx, D(0)) - AS2( add [AS_REG_7+1*4], eax) - AS2( add [AS_REG_7+2*4], ebx) - AS2( add [AS_REG_7+3*4], ecx) - AS2( mov eax, F(0)) - AS2( mov ebx, G(0)) - AS2( mov ecx, H(0)) - AS2( add [AS_REG_7+5*4], eax) - AS2( add [AS_REG_7+6*4], ebx) - AS2( add [AS_REG_7+7*4], ecx) - AS2( mov ecx, AS_REG_7d) - AS2( cmp WORD_REG(dx), DATA_END) - ASJ( jb, 2, b) -#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE - ASL(5) -#endif -#endif - - AS_POP_IF86(sp) - AS_POP_IF86(bp) - #if !defined(_MSC_VER) || (_MSC_VER < 1400) - AS_POP_IF86(bx) - #endif - -#ifdef CRYPTOPP_GENERATE_X64_MASM - add rsp, LOCALS_SIZE+8 - pop rbp - pop rbx - pop rdi - pop rsi - ret - X86_SHA256_HashBlocks ENDP -#endif - -#ifdef __GNUC__ - ".att_syntax prefix;" - : - : "c" (state), "d" (data), "S" (SHA256_K+48), "D" (len) - #if CRYPTOPP_BOOL_X64 - , "m" (workspace[0]) - #endif - : "memory", "cc", "%eax" - #if CRYPTOPP_BOOL_X64 - , "%rbx", "%r8", "%r10" - #endif - ); -#endif -} - -static inline bool HasSSE2(void) { return false; } - -static void SHA256_Transform32(word32 *state, const word32 *data) -{ - word32 W[16]; - int i; - - for (i = 0; i < 16; i++) - W[i] = swab32(((word32 *)(data))[i]); - - X86_SHA256_HashBlocks(state, W, 16 * 4); -} - -static void runhash32(void *state, const void *input, const void *init) -{ - memcpy(state, init, 32); - SHA256_Transform32(state, input); -} - -/* suspiciously similar to ScanHash* from bitcoin */ -bool scanhash_asm32(struct thr_info*thr, const unsigned char *midstate, - unsigned char *data, - unsigned char *hash1, unsigned char *hash, - const unsigned char *target, - uint32_t max_nonce, uint32_t *last_nonce, - uint32_t n) -{ - uint32_t *hash32 = (uint32_t *) hash; - uint32_t *nonce = (uint32_t *)(data + 76); - - data += 64; - - while (1) { - n++; - *nonce = n; - - runhash32(hash1, data, midstate); - runhash32(hash, hash1, sha256_init_state); - - if (unlikely((hash32[7] == 0) && fulltest(hash, target))) { - *last_nonce = n; - return true; - } - - if ((n >= max_nonce) || thr->work_restart) { - *last_nonce = n; - return false; - } - } -} - -#endif // #if defined(WANT_CRYPTOPP_ASM32) diff --git a/sha256_generic.c b/sha256_generic.c deleted file mode 100644 index 8badf294..00000000 --- a/sha256_generic.c +++ /dev/null @@ -1,274 +0,0 @@ -/* - * Cryptographic API. - * - * SHA-256, as specified in - * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf - * - * SHA-256 code by Jean-Luc Cooke . - * - * Copyright (c) Jean-Luc Cooke - * Copyright (c) Andrew McDonald - * Copyright (c) 2002 James Morris - * SHA224 Support Copyright 2007 Intel Corporation - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - */ - -#include "config.h" - -#include -#include -#include -#include -#include "miner.h" - -typedef uint32_t u32; -typedef uint8_t u8; - -static inline u32 ror32(u32 word, unsigned int shift) -{ - return (word >> shift) | (word << (32 - shift)); -} - -static inline u32 Ch(u32 x, u32 y, u32 z) -{ - return z ^ (x & (y ^ z)); -} - -static inline u32 Maj(u32 x, u32 y, u32 z) -{ - return (x & y) | (z & (x | y)); -} - -#define e0(x) (ror32(x, 2) ^ ror32(x,13) ^ ror32(x,22)) -#define e1(x) (ror32(x, 6) ^ ror32(x,11) ^ ror32(x,25)) -#define s0(x) (ror32(x, 7) ^ ror32(x,18) ^ (x >> 3)) -#define s1(x) (ror32(x,17) ^ ror32(x,19) ^ (x >> 10)) - -static inline void LOAD_OP(int I, u32 *W, const u8 *input) -{ - /* byteswap is commented out, because bitcoin input - * is already big-endian - */ - W[I] = /* ntohl */ ( ((u32*)(input))[I] ); -} - -static inline void BLEND_OP(int I, u32 *W) -{ - W[I] = s1(W[I-2]) + W[I-7] + s0(W[I-15]) + W[I-16]; -} - -static void sha256_transform(u32 *state, const u8 *input) -{ - u32 a, b, c, d, e, f, g, h, t1, t2; - u32 W[64]; - int i; - - /* load the input */ - for (i = 0; i < 16; i++) - LOAD_OP(i, W, input); - - /* now blend */ - for (i = 16; i < 64; i++) - BLEND_OP(i, W); - - /* load the state into our registers */ - a=state[0]; b=state[1]; c=state[2]; d=state[3]; - e=state[4]; f=state[5]; g=state[6]; h=state[7]; - - /* now iterate */ - t1 = h + e1(e) + Ch(e,f,g) + 0x428a2f98 + W[ 0]; - t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2; - t1 = g + e1(d) + Ch(d,e,f) + 0x71374491 + W[ 1]; - t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2; - t1 = f + e1(c) + Ch(c,d,e) + 0xb5c0fbcf + W[ 2]; - t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2; - t1 = e + e1(b) + Ch(b,c,d) + 0xe9b5dba5 + W[ 3]; - t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2; - t1 = d + e1(a) + Ch(a,b,c) + 0x3956c25b + W[ 4]; - t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2; - t1 = c + e1(h) + Ch(h,a,b) + 0x59f111f1 + W[ 5]; - t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2; - t1 = b + e1(g) + Ch(g,h,a) + 0x923f82a4 + W[ 6]; - t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2; - t1 = a + e1(f) + Ch(f,g,h) + 0xab1c5ed5 + W[ 7]; - t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2; - - t1 = h + e1(e) + Ch(e,f,g) + 0xd807aa98 + W[ 8]; - t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2; - t1 = g + e1(d) + Ch(d,e,f) + 0x12835b01 + W[ 9]; - t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2; - t1 = f + e1(c) + Ch(c,d,e) + 0x243185be + W[10]; - t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2; - t1 = e + e1(b) + Ch(b,c,d) + 0x550c7dc3 + W[11]; - t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2; - t1 = d + e1(a) + Ch(a,b,c) + 0x72be5d74 + W[12]; - t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2; - t1 = c + e1(h) + Ch(h,a,b) + 0x80deb1fe + W[13]; - t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2; - t1 = b + e1(g) + Ch(g,h,a) + 0x9bdc06a7 + W[14]; - t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2; - t1 = a + e1(f) + Ch(f,g,h) + 0xc19bf174 + W[15]; - t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2; - - t1 = h + e1(e) + Ch(e,f,g) + 0xe49b69c1 + W[16]; - t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2; - t1 = g + e1(d) + Ch(d,e,f) + 0xefbe4786 + W[17]; - t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2; - t1 = f + e1(c) + Ch(c,d,e) + 0x0fc19dc6 + W[18]; - t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2; - t1 = e + e1(b) + Ch(b,c,d) + 0x240ca1cc + W[19]; - t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2; - t1 = d + e1(a) + Ch(a,b,c) + 0x2de92c6f + W[20]; - t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2; - t1 = c + e1(h) + Ch(h,a,b) + 0x4a7484aa + W[21]; - t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2; - t1 = b + e1(g) + Ch(g,h,a) + 0x5cb0a9dc + W[22]; - t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2; - t1 = a + e1(f) + Ch(f,g,h) + 0x76f988da + W[23]; - t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2; - - t1 = h + e1(e) + Ch(e,f,g) + 0x983e5152 + W[24]; - t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2; - t1 = g + e1(d) + Ch(d,e,f) + 0xa831c66d + W[25]; - t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2; - t1 = f + e1(c) + Ch(c,d,e) + 0xb00327c8 + W[26]; - t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2; - t1 = e + e1(b) + Ch(b,c,d) + 0xbf597fc7 + W[27]; - t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2; - t1 = d + e1(a) + Ch(a,b,c) + 0xc6e00bf3 + W[28]; - t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2; - t1 = c + e1(h) + Ch(h,a,b) + 0xd5a79147 + W[29]; - t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2; - t1 = b + e1(g) + Ch(g,h,a) + 0x06ca6351 + W[30]; - t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2; - t1 = a + e1(f) + Ch(f,g,h) + 0x14292967 + W[31]; - t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2; - - t1 = h + e1(e) + Ch(e,f,g) + 0x27b70a85 + W[32]; - t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2; - t1 = g + e1(d) + Ch(d,e,f) + 0x2e1b2138 + W[33]; - t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2; - t1 = f + e1(c) + Ch(c,d,e) + 0x4d2c6dfc + W[34]; - t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2; - t1 = e + e1(b) + Ch(b,c,d) + 0x53380d13 + W[35]; - t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2; - t1 = d + e1(a) + Ch(a,b,c) + 0x650a7354 + W[36]; - t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2; - t1 = c + e1(h) + Ch(h,a,b) + 0x766a0abb + W[37]; - t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2; - t1 = b + e1(g) + Ch(g,h,a) + 0x81c2c92e + W[38]; - t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2; - t1 = a + e1(f) + Ch(f,g,h) + 0x92722c85 + W[39]; - t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2; - - t1 = h + e1(e) + Ch(e,f,g) + 0xa2bfe8a1 + W[40]; - t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2; - t1 = g + e1(d) + Ch(d,e,f) + 0xa81a664b + W[41]; - t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2; - t1 = f + e1(c) + Ch(c,d,e) + 0xc24b8b70 + W[42]; - t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2; - t1 = e + e1(b) + Ch(b,c,d) + 0xc76c51a3 + W[43]; - t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2; - t1 = d + e1(a) + Ch(a,b,c) + 0xd192e819 + W[44]; - t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2; - t1 = c + e1(h) + Ch(h,a,b) + 0xd6990624 + W[45]; - t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2; - t1 = b + e1(g) + Ch(g,h,a) + 0xf40e3585 + W[46]; - t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2; - t1 = a + e1(f) + Ch(f,g,h) + 0x106aa070 + W[47]; - t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2; - - t1 = h + e1(e) + Ch(e,f,g) + 0x19a4c116 + W[48]; - t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2; - t1 = g + e1(d) + Ch(d,e,f) + 0x1e376c08 + W[49]; - t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2; - t1 = f + e1(c) + Ch(c,d,e) + 0x2748774c + W[50]; - t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2; - t1 = e + e1(b) + Ch(b,c,d) + 0x34b0bcb5 + W[51]; - t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2; - t1 = d + e1(a) + Ch(a,b,c) + 0x391c0cb3 + W[52]; - t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2; - t1 = c + e1(h) + Ch(h,a,b) + 0x4ed8aa4a + W[53]; - t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2; - t1 = b + e1(g) + Ch(g,h,a) + 0x5b9cca4f + W[54]; - t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2; - t1 = a + e1(f) + Ch(f,g,h) + 0x682e6ff3 + W[55]; - t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2; - - t1 = h + e1(e) + Ch(e,f,g) + 0x748f82ee + W[56]; - t2 = e0(a) + Maj(a,b,c); d+=t1; h=t1+t2; - t1 = g + e1(d) + Ch(d,e,f) + 0x78a5636f + W[57]; - t2 = e0(h) + Maj(h,a,b); c+=t1; g=t1+t2; - t1 = f + e1(c) + Ch(c,d,e) + 0x84c87814 + W[58]; - t2 = e0(g) + Maj(g,h,a); b+=t1; f=t1+t2; - t1 = e + e1(b) + Ch(b,c,d) + 0x8cc70208 + W[59]; - t2 = e0(f) + Maj(f,g,h); a+=t1; e=t1+t2; - t1 = d + e1(a) + Ch(a,b,c) + 0x90befffa + W[60]; - t2 = e0(e) + Maj(e,f,g); h+=t1; d=t1+t2; - t1 = c + e1(h) + Ch(h,a,b) + 0xa4506ceb + W[61]; - t2 = e0(d) + Maj(d,e,f); g+=t1; c=t1+t2; - t1 = b + e1(g) + Ch(g,h,a) + 0xbef9a3f7 + W[62]; - t2 = e0(c) + Maj(c,d,e); f+=t1; b=t1+t2; - t1 = a + e1(f) + Ch(f,g,h) + 0xc67178f2 + W[63]; - t2 = e0(b) + Maj(b,c,d); e+=t1; a=t1+t2; - - state[0] += a; state[1] += b; state[2] += c; state[3] += d; - state[4] += e; state[5] += f; state[6] += g; state[7] += h; - -#if 0 - /* clear any sensitive info... */ - a = b = c = d = e = f = g = h = t1 = t2 = 0; - memset(W, 0, 64 * sizeof(u32)); -#endif -} - -static void runhash(void *state, const void *input, const void *init) -{ - memcpy(state, init, 32); - sha256_transform(state, input); -} - -const uint32_t sha256_init_state[8] = { - 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, - 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 -}; - -/* suspiciously similar to ScanHash* from bitcoin */ -bool scanhash_c(struct thr_info*thr, const unsigned char *midstate, unsigned char *data, - unsigned char *hash1, unsigned char *hash, - const unsigned char *target, - uint32_t max_nonce, uint32_t *last_nonce, - uint32_t n) -{ - uint32_t *hash32 = (uint32_t *) hash; - uint32_t *nonce = (uint32_t *)(data + 76); - unsigned long stat_ctr = 0; - - data += 64; - - while (1) { - n++; - *nonce = n; - - runhash(hash1, data, midstate); - runhash(hash, hash1, sha256_init_state); - - stat_ctr++; - - if (unlikely((hash32[7] == 0) && fulltest(hash, target))) { - *last_nonce = n; - return true; - } - - if ((n >= max_nonce) || thr->work_restart) { - *last_nonce = n; - return false; - } - } -} - diff --git a/sha256_sse2_amd64.c b/sha256_sse2_amd64.c deleted file mode 100644 index 65a9390c..00000000 --- a/sha256_sse2_amd64.c +++ /dev/null @@ -1,133 +0,0 @@ -/* - * SHA-256 driver for ASM routine for x86_64 on Linux - * Copyright (c) Mark Crichton - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - */ - -#include "driver-cpu.h" - -#ifdef WANT_X8664_SSE2 - -#include -#include - -#include -#include -#include - -extern void sha256_sse2_64_new (__m128i *res, __m128i *res1, __m128i *data, const uint32_t init[8]); - -static uint32_t g_sha256_k[]__attribute__((aligned(0x100))) = { - 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, /* 0 */ - 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, - 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, /* 8 */ - 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, - 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, /* 16 */ - 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, - 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, /* 24 */ - 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, - 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, /* 32 */ - 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, - 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, /* 40 */ - 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, - 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, /* 48 */ - 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, - 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, /* 56 */ - 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 -}; - - -const uint32_t sha256_init[8]__attribute__((aligned(0x100))) = -{0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19}; - -__m128i g_4sha256_k[64]; -__m128i sha256_consts_m128i[64]__attribute__((aligned(0x1000))); - -bool scanhash_sse2_64(struct thr_info*thr, const unsigned char *pmidstate, - unsigned char *pdata, - unsigned char *phash1, unsigned char *phash, - const unsigned char *ptarget, - uint32_t max_nonce, uint32_t *last_nonce, - uint32_t nonce) -{ - uint32_t *nNonce_p = (uint32_t *)(pdata + 76); - uint32_t m_midstate[8], m_w[16], m_w1[16]; - __m128i m_4w[64] __attribute__ ((aligned (0x100))); - __m128i m_4hash[64] __attribute__ ((aligned (0x100))); - __m128i m_4hash1[64] __attribute__ ((aligned (0x100))); - __m128i offset; - int i; - - pdata += 64; - - /* For debugging */ - union { - __m128i m; - uint32_t i[4]; - } mi; - - /* Message expansion */ - memcpy(m_midstate, pmidstate, sizeof(m_midstate)); - memcpy(m_w, pdata, sizeof(m_w)); /* The 2nd half of the data */ - memcpy(m_w1, phash1, sizeof(m_w1)); - memset(m_4hash, 0, sizeof(m_4hash)); - - /* Transmongrify */ - for (i = 0; i < 16; i++) - m_4w[i] = _mm_set1_epi32(m_w[i]); - - for (i = 0; i < 16; i++) - m_4hash1[i] = _mm_set1_epi32(m_w1[i]); - - for (i = 0; i < 64; i++) - sha256_consts_m128i[i] = _mm_set1_epi32(g_sha256_k[i]); - - offset = _mm_set_epi32(0x3, 0x2, 0x1, 0x0); - - for (;;) - { - int j; - - m_4w[3] = _mm_add_epi32(offset, _mm_set1_epi32(nonce)); - - sha256_sse2_64_new (m_4hash, m_4hash1, m_4w, m_midstate); - - for (j = 0; j < 4; j++) { - mi.m = m_4hash[7]; - if (unlikely(mi.i[j] == 0)) - break; - } - - /* If j = true, we found a hit...so check it */ - /* Use the C version for a check... */ - if (unlikely(j != 4)) { - for (i = 0; i < 8; i++) { - mi.m = m_4hash[i]; - *(uint32_t *)&(phash)[i*4] = mi.i[j]; - } - - if (fulltest(phash, ptarget)) { - nonce += j; - *last_nonce = nonce + 1; - *nNonce_p = nonce; - return true; - } - } - - if (unlikely((nonce >= max_nonce) || thr->work_restart)) - { - *last_nonce = nonce; - return false; - } - - nonce += 4; - } -} - -#endif /* WANT_X8664_SSE2 */ - diff --git a/sha256_sse2_i386.c b/sha256_sse2_i386.c deleted file mode 100644 index 30d9ef17..00000000 --- a/sha256_sse2_i386.c +++ /dev/null @@ -1,125 +0,0 @@ -/* - * SHA-256 driver for ASM routine for x86_64 on Linux - * Copyright (c) Mark Crichton - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - */ - -#include "driver-cpu.h" - -#ifdef WANT_X8632_SSE2 - -#include -#include - -#include -#include -#include - -extern void CalcSha256_x86 (__m128i *res, __m128i *data, const uint32_t init[8])__attribute__((fastcall)); - -static uint32_t g_sha256_k[]__attribute__((aligned(0x100))) = { - 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, /* 0 */ - 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, - 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, /* 8 */ - 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, - 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, /* 16 */ - 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, - 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, /* 24 */ - 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, - 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, /* 32 */ - 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, - 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, /* 40 */ - 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, - 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, /* 48 */ - 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, - 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, /* 56 */ - 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 -}; - - -const uint32_t sha256_32init[8]__attribute__((aligned(0x100))) = -{0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19}; - -__m128i g_4sha256_k[64]; -__m128i sha256_consts_m128i[64]__attribute__((aligned(0x1000))); - -bool scanhash_sse2_32(struct thr_info*thr, const unsigned char *pmidstate, - unsigned char *pdata, - unsigned char *phash1, unsigned char *phash, - const unsigned char *ptarget, - uint32_t max_nonce, uint32_t *last_nonce, - uint32_t nonce) -{ - uint32_t *nNonce_p = (uint32_t *)(pdata + 76); - uint32_t m_midstate[8], m_w[16], m_w1[16]; - __m128i m_4w[64] __attribute__ ((aligned (0x100))); - __m128i m_4hash[64] __attribute__ ((aligned (0x100))); - __m128i m_4hash1[64] __attribute__ ((aligned (0x100))); - __m128i offset; - int i; - - pdata += 64; - - /* Message expansion */ - memcpy(m_midstate, pmidstate, sizeof(m_midstate)); - memcpy(m_w, pdata, sizeof(m_w)); /* The 2nd half of the data */ - memcpy(m_w1, phash1, sizeof(m_w1)); - memset(m_4hash, 0, sizeof(m_4hash)); - - /* Transmongrify */ - for (i = 0; i < 16; i++) - m_4w[i] = _mm_set1_epi32(m_w[i]); - - for (i = 0; i < 16; i++) - m_4hash1[i] = _mm_set1_epi32(m_w1[i]); - - for (i = 0; i < 64; i++) - sha256_consts_m128i[i] = _mm_set1_epi32(g_sha256_k[i]); - - offset = _mm_set_epi32(0x3, 0x2, 0x1, 0x0); - - for (;;) - { - int j; - - m_4w[3] = _mm_add_epi32(offset, _mm_set1_epi32(nonce)); - - /* Some optimization can be done here W.R.T. precalculating some hash */ - CalcSha256_x86 (m_4hash1, m_4w, m_midstate); - CalcSha256_x86 (m_4hash, m_4hash1, sha256_32init); - - for (j = 0; j < 4; j++) { - if (unlikely(((uint32_t *)&(m_4hash[7]))[j] == 0)) { - /* We found a hit...so check it */ - /* Use the C version for a check... */ - - for (i = 0; i < 8; i++) { - *(uint32_t *)&(phash)[i<<2] = ((uint32_t *)&(m_4hash[i]))[j]; - } - - if (fulltest(phash, ptarget)) { - nonce += j; - *last_nonce = nonce; - *nNonce_p = nonce; - return true; - } - } - } - - if (unlikely((nonce >= max_nonce) || thr->work_restart)) { - *last_nonce = nonce; - return false; - } - - nonce += 4; - - } -} - -#endif /* WANT_X8632_SSE2 */ - diff --git a/sha256_sse4_amd64.c b/sha256_sse4_amd64.c deleted file mode 100644 index 78d74904..00000000 --- a/sha256_sse4_amd64.c +++ /dev/null @@ -1,132 +0,0 @@ -/* - * SHA-256 driver for ASM routine for x86_64 on Linux - * Copyright (c) Mark Crichton - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - */ - -#include "driver-cpu.h" - -#ifdef WANT_X8664_SSE4 - -#include -#include - -#include -#include -#include - -extern void CalcSha256_x64_sse4(__m128i *res, __m128i *data, uint32_t init[8]); - -static uint32_t g_sha256_k[] = { - 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, /* 0 */ - 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, - 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, /* 8 */ - 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, - 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, /* 16 */ - 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, - 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, /* 24 */ - 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, - 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, /* 32 */ - 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, - 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, /* 40 */ - 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, - 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, /* 48 */ - 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, - 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, /* 56 */ - 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 -}; - - -static uint32_t g_sha256_hinit[8] = -{0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19}; - -__m128i g_4sha256_k[64]; - -bool scanhash_sse4_64(struct thr_info*thr, const unsigned char *pmidstate, - unsigned char *pdata, - unsigned char *phash1, unsigned char *phash, - const unsigned char *ptarget, - uint32_t max_nonce, uint32_t *last_nonce, - uint32_t nonce) -{ - uint32_t *nNonce_p = (uint32_t *)(pdata + 76); - uint32_t m_midstate[8], m_w[16], m_w1[16]; - __m128i m_4w[64], m_4hash[64], m_4hash1[64]; - __m128i offset; - int i; - - pdata += 64; - - /* For debugging */ - union { - __m128i m; - uint32_t i[4]; - } mi; - - /* Message expansion */ - memcpy(m_midstate, pmidstate, sizeof(m_midstate)); - memcpy(m_w, pdata, sizeof(m_w)); /* The 2nd half of the data */ - memcpy(m_w1, phash1, sizeof(m_w1)); - memset(m_4hash, 0, sizeof(m_4hash)); - - /* Transmongrify */ - for (i = 0; i < 16; i++) - m_4w[i] = _mm_set1_epi32(m_w[i]); - - for (i = 0; i < 16; i++) - m_4hash1[i] = _mm_set1_epi32(m_w1[i]); - - for (i = 0; i < 64; i++) - g_4sha256_k[i] = _mm_set1_epi32(g_sha256_k[i]); - - offset = _mm_set_epi32(0x3, 0x2, 0x1, 0x0); - - for (;;) - { - int j; - - m_4w[3] = _mm_add_epi32(offset, _mm_set1_epi32(nonce)); - - /* Some optimization can be done here W.R.T. precalculating some hash */ - CalcSha256_x64_sse4(m_4hash1, m_4w, m_midstate); - CalcSha256_x64_sse4(m_4hash, m_4hash1, g_sha256_hinit); - - for (j = 0; j < 4; j++) { - mi.m = m_4hash[7]; - if (unlikely(mi.i[j] == 0)) - break; - } - - /* If j = true, we found a hit...so check it */ - /* Use the C version for a check... */ - if (unlikely(j != 4)) { - for (i = 0; i < 8; i++) { - mi.m = m_4hash[i]; - *(uint32_t *)&(phash)[i*4] = mi.i[j]; - } - - if (fulltest(phash, ptarget)) { - nonce += j; - *last_nonce = nonce; - *nNonce_p = nonce; - return true; - } - } - - if (unlikely((nonce >= max_nonce) || thr->work_restart)) - { - *last_nonce = nonce; - return false; - } - - nonce += 4; - } -} - -#endif /* WANT_X8664_SSE4 */ - diff --git a/sha256_via.c b/sha256_via.c deleted file mode 100644 index c58ba695..00000000 --- a/sha256_via.c +++ /dev/null @@ -1,85 +0,0 @@ - -#include "driver-cpu.h" - -#include -#include -#include -#include -#include -#include "miner.h" - -#ifdef WANT_VIA_PADLOCK - -static void via_sha256(void *hash, void *buf, unsigned len) -{ - unsigned stat = 0; - asm volatile(".byte 0xf3, 0x0f, 0xa6, 0xd0" - :"+S"(buf), "+a"(stat) - :"c"(len), "D" (hash) - :"memory"); -} - -bool scanhash_via(struct thr_info*thr, const unsigned char __maybe_unused *pmidstate, - unsigned char *data_inout, - unsigned char __maybe_unused *phash1, unsigned char __maybe_unused *phash, - const unsigned char *target, - uint32_t max_nonce, uint32_t *last_nonce, - uint32_t n) -{ - unsigned char data[128] __attribute__((aligned(128))); - unsigned char tmp_hash[32] __attribute__((aligned(128))); - unsigned char tmp_hash1[32] __attribute__((aligned(128))); - uint32_t *data32 = (uint32_t *) data; - uint32_t *hash32 = (uint32_t *) tmp_hash; - uint32_t *nonce = (uint32_t *)(data + 64 + 12); - unsigned long stat_ctr = 0; - int i; - - /* bitcoin gives us big endian input, but via wants LE, - * so we reverse the swapping bitcoin has already done (extra work) - * in order to permit the hardware to swap everything - * back to BE again (extra work). - */ - for (i = 0; i < 128/4; i++) - data32[i] = swab32(((uint32_t *)data_inout)[i]); - - while (1) { - n++; - *nonce = n; - - /* first SHA256 transform */ - memcpy(tmp_hash1, sha256_init_state, 32); - via_sha256(tmp_hash1, data, 80); /* or maybe 128? */ - - for (i = 0; i < 32/4; i++) - ((uint32_t *)tmp_hash1)[i] = - swab32(((uint32_t *)tmp_hash1)[i]); - - /* second SHA256 transform */ - memcpy(tmp_hash, sha256_init_state, 32); - via_sha256(tmp_hash, tmp_hash1, 32); - - stat_ctr++; - - if (unlikely((hash32[7] == 0) && fulltest(tmp_hash, target))) { - /* swap nonce'd data back into original storage area; - * TODO: only swap back the nonce, rather than all data - */ - for (i = 0; i < 128/4; i++) { - uint32_t *dout32 = (uint32_t *) data_inout; - dout32[i] = swab32(data32[i]); - } - - *last_nonce = n; - return true; - } - - if ((n >= max_nonce) || thr->work_restart) { - *last_nonce = n; - return false; - } - } -} - -#endif /* WANT_VIA_PADLOCK */ - diff --git a/x86_32/.gitignore b/x86_32/.gitignore deleted file mode 100644 index 1a571afc..00000000 --- a/x86_32/.gitignore +++ /dev/null @@ -1 +0,0 @@ -libx8632.a diff --git a/x86_32/Makefile.am b/x86_32/Makefile.am deleted file mode 100644 index 8916a305..00000000 --- a/x86_32/Makefile.am +++ /dev/null @@ -1,8 +0,0 @@ -noinst_LIBRARIES = libx8632.a - -SUFFIXES = .asm - -libx8632_a_SOURCES = sha256_xmm.asm - -.asm.o: - $(YASM) -f $(YASM_FMT) $< diff --git a/x86_32/sha256_xmm.asm b/x86_32/sha256_xmm.asm deleted file mode 100644 index 3e9c9283..00000000 --- a/x86_32/sha256_xmm.asm +++ /dev/null @@ -1,259 +0,0 @@ -;; SHA-256 for X86 for Linux, based off of:A - -; (c) Ufasoft 2011 http://ufasoft.com mailto:support@ufasoft.com -; Version 2011 -; This software is Public Domain - -; SHA-256 CPU SSE cruncher for Bitcoin Miner - -ALIGN 32 -BITS 32 - -%define hash ecx -%define data edx -%define init esi - -; 0 = (1024 - 256) (mod (LAB_CALC_UNROLL*LAB_CALC_PARA*16)) -%define LAB_CALC_PARA 2 -%define LAB_CALC_UNROLL 24 - -%define LAB_LOOP_UNROLL 64 - -extern _sha256_consts_m128i - -global $@CalcSha256_x86@12 -; CalcSha256 hash(ecx), data(edx), init([esp+4]) -@CalcSha256_x86@12: - push esi - push edi - mov init, [esp+12] - -LAB_SHA: - lea edi, qword [data+256] ; + 256 - -LAB_CALC: -%macro lab_calc_blk 1 - movdqa xmm0, [edi-(15-%1)*16] ; xmm0 = W[I-15] - movdqa xmm4, [edi-(15-(%1+1))*16] ; xmm4 = W[I-15+1] - movdqa xmm2, xmm0 ; xmm2 = W[I-15] - movdqa xmm6, xmm4 ; xmm6 = W[I-15+1] - psrld xmm0, 3 ; xmm0 = W[I-15] >> 3 - psrld xmm4, 3 ; xmm4 = W[I-15+1] >> 3 - movdqa xmm1, xmm0 ; xmm1 = W[I-15] >> 3 - movdqa xmm5, xmm4 ; xmm5 = W[I-15+1] >> 3 - pslld xmm2, 14 ; xmm2 = W[I-15] << 14 - pslld xmm6, 14 ; xmm6 = W[I-15+1] << 14 - psrld xmm1, 4 ; xmm1 = W[I-15] >> 7 - psrld xmm5, 4 ; xmm5 = W[I-15+1] >> 7 - pxor xmm0, xmm1 ; xmm0 = (W[I-15] >> 3) ^ (W[I-15] >> 7) - pxor xmm4, xmm5 ; xmm4 = (W[I-15+1] >> 3) ^ (W[I-15+1] >> 7) - psrld xmm1, 11 ; xmm1 = W[I-15] >> 18 - psrld xmm5, 11 ; xmm5 = W[I-15+1] >> 18 - pxor xmm0, xmm2 ; xmm0 = (W[I-15] >> 3) ^ (W[I-15] >> 7) ^ (W[I-15] << 14) - pxor xmm4, xmm6 ; xmm4 = (W[I-15+1] >> 3) ^ (W[I-15+1] >> 7) ^ (W[I-15+1] << 14) - pslld xmm2, 11 ; xmm2 = W[I-15] << 25 - pslld xmm6, 11 ; xmm6 = W[I-15+1] << 25 - pxor xmm0, xmm1 ; xmm0 = (W[I-15] >> 3) ^ (W[I-15] >> 7) ^ (W[I-15] << 14) ^ (W[I-15] >> 18) - pxor xmm4, xmm5 ; xmm4 = (W[I-15+1] >> 3) ^ (W[I-15+1] >> 7) ^ (W[I-15+1] << 14) ^ (W[I-15+1] >> 18) - pxor xmm0, xmm2 ; xmm0 = (W[I-15] >> 3) ^ (W[I-15] >> 7) ^ (W[I-15] << 14) ^ (W[I-15] >> 18) ^ (W[I-15] << 25) - pxor xmm4, xmm6 ; xmm4 = (W[I-15+1] >> 3) ^ (W[I-15+1] >> 7) ^ (W[I-15+1] << 14) ^ (W[I-15+1] >> 18) ^ (W[I-15+1] << 25) - - movdqa xmm3, [edi-(2-%1)*16] ; xmm3 = W[I-2] - movdqa xmm7, [edi-(2-(%1+1))*16] ; xmm7 = W[I-2+1] - - paddd xmm0, [edi-(16-%1)*16] ; xmm0 = s0(W[I-15]) + W[I-16] - paddd xmm4, [edi-(16-(%1+1))*16] ; xmm4 = s0(W[I-15+1]) + W[I-16+1] - -;;;;;;;;;;;;;;;;;; - - movdqa xmm2, xmm3 ; xmm2 = W[I-2] - movdqa xmm6, xmm7 ; xmm6 = W[I-2+1] - psrld xmm3, 10 ; xmm3 = W[I-2] >> 10 - psrld xmm7, 10 ; xmm7 = W[I-2+1] >> 10 - movdqa xmm1, xmm3 ; xmm1 = W[I-2] >> 10 - movdqa xmm5, xmm7 ; xmm5 = W[I-2+1] >> 10 - - paddd xmm0, [edi-(7-%1)*16] ; xmm0 = s0(W[I-15]) + W[I-16] + W[I-7] - - pslld xmm2, 13 ; xmm2 = W[I-2] << 13 - pslld xmm6, 13 ; xmm6 = W[I-2+1] << 13 - psrld xmm1, 7 ; xmm1 = W[I-2] >> 17 - psrld xmm5, 7 ; xmm5 = W[I-2+1] >> 17 - - paddd xmm4, [edi-(7-(%1+1))*16] ; xmm4 = s0(W[I-15+1]) + W[I-16+1] + W[I-7+1] - - pxor xmm3, xmm1 ; xmm3 = (W[I-2] >> 10) ^ (W[I-2] >> 17) - pxor xmm7, xmm5 ; xmm7 = (W[I-2+1] >> 10) ^ (W[I-2+1] >> 17) - psrld xmm1, 2 ; xmm1 = W[I-2] >> 19 - psrld xmm5, 2 ; xmm5 = W[I-2+1] >> 19 - pxor xmm3, xmm2 ; xmm3 = (W[I-2] >> 10) ^ (W[I-2] >> 17) ^ (W[I-2] << 13) - pxor xmm7, xmm6 ; xmm7 = (W[I-2+1] >> 10) ^ (W[I-2+1] >> 17) ^ (W[I-2+1] << 13) - pslld xmm2, 2 ; xmm2 = W[I-2] << 15 - pslld xmm6, 2 ; xmm6 = W[I-2+1] << 15 - pxor xmm3, xmm1 ; xmm3 = (W[I-2] >> 10) ^ (W[I-2] >> 17) ^ (W[I-2] << 13) ^ (W[I-2] >> 19) - pxor xmm7, xmm5 ; xmm7 = (W[I-2+1] >> 10) ^ (W[I-2+1] >> 17) ^ (W[I-2+1] << 13) ^ (W[I-2+1] >> 19) - pxor xmm3, xmm2 ; xmm3 = (W[I-2] >> 10) ^ (W[I-2] >> 17) ^ (W[I-2] << 13) ^ (W[I-2] >> 19) ^ (W[I-2] << 15) - pxor xmm7, xmm6 ; xmm7 = (W[I-2+1] >> 10) ^ (W[I-2+1] >> 17) ^ (W[I-2+1] << 13) ^ (W[I-2+1] >> 19) ^ (W[I-2+1] << 15) - - paddd xmm0, xmm3 ; xmm0 = s0(W[I-15]) + W[I-16] + s1(W[I-2]) + W[I-7] - paddd xmm4, xmm7 ; xmm4 = s0(W[I-15+1]) + W[I-16+1] + s1(W[I-2+1]) + W[I-7+1] - movdqa [edi+(%1*16)], xmm0 - movdqa [edi+((%1+1)*16)], xmm4 -%endmacro - -%assign i 0 -%rep LAB_CALC_UNROLL - lab_calc_blk i -%assign i i+LAB_CALC_PARA -%endrep - -; Load the init values of the message into the hash. - - movdqa xmm7, [init] - pshufd xmm5, xmm7, 0x55 ; xmm5 == b - pshufd xmm4, xmm7, 0xAA ; xmm4 == c - pshufd xmm3, xmm7, 0xFF ; xmm3 == d - pshufd xmm7, xmm7, 0 ; xmm7 == a - - movdqa xmm0, [init+4*4] - pshufd xmm1, xmm0, 0x55 ; [hash+0*16] == f - movdqa [hash+0*16], xmm1 - - pshufd xmm1, xmm0, 0xAA ; [hash+1*16] == g - movdqa [hash+1*16], xmm1 - - pshufd xmm1, xmm0, 0xFF ; [hash+2*16] == h - movdqa [hash+2*16], xmm1 - - pshufd xmm0, xmm0, 0 ; xmm0 == e - - -LAB_LOOP: - -;; T t1 = h + (Rotr32(e, 6) ^ Rotr32(e, 11) ^ Rotr32(e, 25)) + ((e & f) ^ AndNot(e, g)) + Expand32(g_sha256_k[j]) + w[j] - -%macro lab_loop_blk 1 - movdqa xmm6, [data+%1] - paddd xmm6, _sha256_consts_m128i[%1] - - paddd xmm6, [hash+2*16] ; +h - - movdqa xmm1, xmm0 - movdqa xmm2, [hash+1*16] - pandn xmm1, xmm2 ; ~e & g - - movdqa [hash+2*16], xmm2 ; h = g - movdqa xmm2, [hash+0*16] ; f - movdqa [hash+1*16], xmm2 ; g = f - - - pand xmm2, xmm0 ; e & f - pxor xmm1, xmm2 ; (e & f) ^ (~e & g) - movdqa [hash+0*16], xmm0 ; f = e - - paddd xmm6, xmm1 ; Ch + h + w[i] + k[i] - - movdqa xmm1, xmm0 - psrld xmm0, 6 - movdqa xmm2, xmm0 - pslld xmm1, 7 - psrld xmm2, 5 - pxor xmm0, xmm1 - pxor xmm0, xmm2 - pslld xmm1, 14 - psrld xmm2, 14 - pxor xmm0, xmm1 - pxor xmm0, xmm2 - pslld xmm1, 5 - pxor xmm0, xmm1 ; Rotr32(e, 6) ^ Rotr32(e, 11) ^ Rotr32(e, 25) - paddd xmm6, xmm0 ; xmm6 = t1 - - movdqa xmm0, xmm3 ; d - paddd xmm0, xmm6 ; e = d+t1 - - movdqa xmm1, xmm5 ; =b - movdqa xmm3, xmm4 ; d = c - movdqa xmm2, xmm4 ; c - pand xmm2, xmm5 ; b & c - pand xmm4, xmm7 ; a & c - pand xmm1, xmm7 ; a & b - pxor xmm1, xmm4 - movdqa xmm4, xmm5 ; c = b - movdqa xmm5, xmm7 ; b = a - pxor xmm1, xmm2 ; (a & c) ^ (a & d) ^ (c & d) - paddd xmm6, xmm1 ; t1 + ((a & c) ^ (a & d) ^ (c & d)) - - movdqa xmm2, xmm7 - psrld xmm7, 2 - movdqa xmm1, xmm7 - pslld xmm2, 10 - psrld xmm1, 11 - pxor xmm7, xmm2 - pxor xmm7, xmm1 - pslld xmm2, 9 - psrld xmm1, 9 - pxor xmm7, xmm2 - pxor xmm7, xmm1 - pslld xmm2, 11 - pxor xmm7, xmm2 - paddd xmm7, xmm6 ; a = t1 + (Rotr32(a, 2) ^ Rotr32(a, 13) ^ Rotr32(a, 22)) + ((a & c) ^ (a & d) ^ (c & d)); -%endmacro - -%assign i 0 -%rep LAB_LOOP_UNROLL - lab_loop_blk i -%assign i i+16 -%endrep - -; Finished the 64 rounds, calculate hash and save - - movdqa xmm1, [init+16] - - pshufd xmm2, xmm1, 0xFF - movdqa xmm6, [hash+2*16] - paddd xmm2, xmm6 - movdqa [hash+7*16], xmm2 - - pshufd xmm2, xmm1, 0xAA - movdqa xmm6, [hash+1*16] - paddd xmm2, xmm6 - movdqa [hash+6*16], xmm2 - - pshufd xmm2, xmm1, 0x55 - movdqa xmm6, [hash+0*16] - paddd xmm2, xmm6 - movdqa [hash+5*16], xmm2 - - pshufd xmm1, xmm1, 0 - paddd xmm0, xmm1 - movdqa [hash+4*16], xmm0 - - movdqa xmm1, [init] - - pshufd xmm2, xmm1, 0xFF - paddd xmm3, xmm2 - movdqa [hash+3*16], xmm3 - - pshufd xmm2, xmm1, 0xAA - paddd xmm4, xmm2 - movdqa [hash+2*16], xmm4 - - pshufd xmm2, xmm1, 0x55 - paddd xmm5, xmm2 - movdqa [hash+1*16], xmm5 - - pshufd xmm1, xmm1, 0 - paddd xmm7, xmm1 - movdqa [hash+0*16], xmm7 - -LAB_RET: - pop edi - pop esi - retn 4 - -%ifidn __OUTPUT_FORMAT__,elf -section .note.GNU-stack noalloc noexec nowrite progbits -%endif -%ifidn __OUTPUT_FORMAT__,elf32 -section .note.GNU-stack noalloc noexec nowrite progbits -%endif diff --git a/x86_64/.gitignore b/x86_64/.gitignore deleted file mode 100644 index a966652f..00000000 --- a/x86_64/.gitignore +++ /dev/null @@ -1 +0,0 @@ -libx8664.a diff --git a/x86_64/Makefile.am b/x86_64/Makefile.am deleted file mode 100644 index 85d997c8..00000000 --- a/x86_64/Makefile.am +++ /dev/null @@ -1,8 +0,0 @@ -noinst_LIBRARIES = libx8664.a - -SUFFIXES = .asm - -libx8664_a_SOURCES = sha256_xmm_amd64.asm sha256_sse4_amd64.asm - -.asm.o: - $(YASM) -f $(YASM_FMT) -o $@ $< diff --git a/x86_64/sha256_sse4_amd64.asm b/x86_64/sha256_sse4_amd64.asm deleted file mode 100644 index f1f5d75d..00000000 --- a/x86_64/sha256_sse4_amd64.asm +++ /dev/null @@ -1,292 +0,0 @@ -;; SHA-256 for X86-64 for Linux, based off of: - -; (c) Ufasoft 2011 http://ufasoft.com mailto:support@ufasoft.com -; Version 2011 -; This software is Public Domain - -; Significant re-write/optimisation and reordering by, -; Neil Kettle -; ~18% performance improvement - -; SHA-256 CPU SSE cruncher for Bitcoin Miner - -ALIGN 32 -BITS 64 - -%ifidn __OUTPUT_FORMAT__,win64 -%define hash rcx -%define data rdx -%define init r8 -%define temp r9 -%else -%define hash rdi -%define data rsi -%define init rdx -%define temp rcx -%endif - -; 0 = (1024 - 256) (mod (LAB_CALC_UNROLL*LAB_CALC_PARA*16)) -%define LAB_CALC_PARA 2 -%define LAB_CALC_UNROLL 8 - -%define LAB_LOOP_UNROLL 8 - -extern g_4sha256_k - -global CalcSha256_x64_sse4 -; CalcSha256 hash(rdi), data(rsi), init(rdx) -; CalcSha256 hash(rcx), data(rdx), init(r8) -CalcSha256_x64_sse4: - - push rbx -%ifidn __OUTPUT_FORMAT__,win64 - sub rsp, 16 * 6 - movdqa [rsp + 16*0], xmm6 - movdqa [rsp + 16*1], xmm7 - movdqa [rsp + 16*2], xmm8 - movdqa [rsp + 16*3], xmm9 - movdqa [rsp + 16*4], xmm10 - movdqa [rsp + 16*5], xmm11 -%endif - -LAB_NEXT_NONCE: - - mov temp, 64*4 ; 256 - temp is # of SHA-2 rounds - mov rax, 16*4 ; 64 - rax is where we expand to - -LAB_SHA: - push temp - lea temp, qword [data+temp*4] ; + 1024 - lea r11, qword [data+rax*4] ; + 256 - -LAB_CALC: -%macro lab_calc_blk 1 - - movntdqa xmm0, [r11-(15-%1)*16] ; xmm0 = W[I-15] - movdqa xmm2, xmm0 ; xmm2 = W[I-15] - movntdqa xmm4, [r11-(15-(%1+1))*16] ; xmm4 = W[I-15+1] - movdqa xmm6, xmm4 ; xmm6 = W[I-15+1] - - psrld xmm0, 3 ; xmm0 = W[I-15] >> 3 - movdqa xmm1, xmm0 ; xmm1 = W[I-15] >> 3 - pslld xmm2, 14 ; xmm2 = W[I-15] << 14 - psrld xmm4, 3 ; xmm4 = W[I-15+1] >> 3 - movdqa xmm5, xmm4 ; xmm5 = W[I-15+1] >> 3 - psrld xmm5, 4 ; xmm5 = W[I-15+1] >> 7 - pxor xmm4, xmm5 ; xmm4 = (W[I-15+1] >> 3) ^ (W[I-15+1] >> 7) - pslld xmm6, 14 ; xmm6 = W[I-15+1] << 14 - psrld xmm1, 4 ; xmm1 = W[I-15] >> 7 - pxor xmm0, xmm1 ; xmm0 = (W[I-15] >> 3) ^ (W[I-15] >> 7) - pxor xmm0, xmm2 ; xmm0 = (W[I-15] >> 3) ^ (W[I-15] >> 7) ^ (W[I-15] << 14) - psrld xmm1, 11 ; xmm1 = W[I-15] >> 18 - psrld xmm5, 11 ; xmm5 = W[I-15+1] >> 18 - pxor xmm4, xmm6 ; xmm4 = (W[I-15+1] >> 3) ^ (W[I-15+1] >> 7) ^ (W[I-15+1] << 14) - pxor xmm4, xmm5 ; xmm4 = (W[I-15+1] >> 3) ^ (W[I-15+1] >> 7) ^ (W[I-15+1] << 14) ^ (W[I-15+1] >> 18) - pslld xmm2, 11 ; xmm2 = W[I-15] << 25 - pslld xmm6, 11 ; xmm6 = W[I-15+1] << 25 - pxor xmm4, xmm6 ; xmm4 = (W[I-15+1] >> 3) ^ (W[I-15+1] >> 7) ^ (W[I-15+1] << 14) ^ (W[I-15+1] >> 18) ^ (W[I-15+1] << 25) - pxor xmm0, xmm1 ; xmm0 = (W[I-15] >> 3) ^ (W[I-15] >> 7) ^ (W[I-15] << 14) ^ (W[I-15] >> 18) - pxor xmm0, xmm2 ; xmm0 = (W[I-15] >> 3) ^ (W[I-15] >> 7) ^ (W[I-15] << 14) ^ (W[I-15] >> 18) ^ (W[I-15] << 25) - paddd xmm0, [r11-(16-%1)*16] ; xmm0 = s0(W[I-15]) + W[I-16] - paddd xmm4, [r11-(16-(%1+1))*16] ; xmm4 = s0(W[I-15+1]) + W[I-16+1] - movntdqa xmm3, [r11-(2-%1)*16] ; xmm3 = W[I-2] - movntdqa xmm7, [r11-(2-(%1+1))*16] ; xmm7 = W[I-2+1] - -;;;;;;;;;;;;;;;;;; - - movdqa xmm2, xmm3 ; xmm2 = W[I-2] - psrld xmm3, 10 ; xmm3 = W[I-2] >> 10 - movdqa xmm1, xmm3 ; xmm1 = W[I-2] >> 10 - movdqa xmm6, xmm7 ; xmm6 = W[I-2+1] - psrld xmm7, 10 ; xmm7 = W[I-2+1] >> 10 - movdqa xmm5, xmm7 ; xmm5 = W[I-2+1] >> 10 - - paddd xmm0, [r11-(7-%1)*16] ; xmm0 = s0(W[I-15]) + W[I-16] + W[I-7] - paddd xmm4, [r11-(7-(%1+1))*16] ; xmm4 = s0(W[I-15+1]) + W[I-16+1] + W[I-7+1] - - pslld xmm2, 13 ; xmm2 = W[I-2] << 13 - pslld xmm6, 13 ; xmm6 = W[I-2+1] << 13 - psrld xmm1, 7 ; xmm1 = W[I-2] >> 17 - psrld xmm5, 7 ; xmm5 = W[I-2+1] >> 17 - - - - pxor xmm3, xmm1 ; xmm3 = (W[I-2] >> 10) ^ (W[I-2] >> 17) - psrld xmm1, 2 ; xmm1 = W[I-2] >> 19 - pxor xmm3, xmm2 ; xmm3 = (W[I-2] >> 10) ^ (W[I-2] >> 17) ^ (W[I-2] << 13) - pslld xmm2, 2 ; xmm2 = W[I-2] << 15 - pxor xmm7, xmm5 ; xmm7 = (W[I-2+1] >> 10) ^ (W[I-2+1] >> 17) - psrld xmm5, 2 ; xmm5 = W[I-2+1] >> 19 - pxor xmm7, xmm6 ; xmm7 = (W[I-2+1] >> 10) ^ (W[I-2+1] >> 17) ^ (W[I-2+1] << 13) - pslld xmm6, 2 ; xmm6 = W[I-2+1] << 15 - - - - pxor xmm3, xmm1 ; xmm3 = (W[I-2] >> 10) ^ (W[I-2] >> 17) ^ (W[I-2] << 13) ^ (W[I-2] >> 19) - pxor xmm3, xmm2 ; xmm3 = (W[I-2] >> 10) ^ (W[I-2] >> 17) ^ (W[I-2] << 13) ^ (W[I-2] >> 19) ^ (W[I-2] << 15) - paddd xmm0, xmm3 ; xmm0 = s0(W[I-15]) + W[I-16] + s1(W[I-2]) + W[I-7] - pxor xmm7, xmm5 ; xmm7 = (W[I-2+1] >> 10) ^ (W[I-2+1] >> 17) ^ (W[I-2+1] << 13) ^ (W[I-2+1] >> 19) - pxor xmm7, xmm6 ; xmm7 = (W[I-2+1] >> 10) ^ (W[I-2+1] >> 17) ^ (W[I-2+1] << 13) ^ (W[I-2+1] >> 19) ^ (W[I-2+1] << 15) - paddd xmm4, xmm7 ; xmm4 = s0(W[I-15+1]) + W[I-16+1] + s1(W[I-2+1]) + W[I-7+1] - - movdqa [r11+(%1*16)], xmm0 - movdqa [r11+((%1+1)*16)], xmm4 -%endmacro - -%assign i 0 -%rep LAB_CALC_UNROLL - lab_calc_blk i -%assign i i+LAB_CALC_PARA -%endrep - - add r11, LAB_CALC_UNROLL*LAB_CALC_PARA*16 - cmp r11, temp - jb LAB_CALC - - pop temp - mov rax, 0 - -; Load the init values of the message into the hash. - - movntdqa xmm7, [init] - pshufd xmm5, xmm7, 0x55 ; xmm5 == b - pshufd xmm4, xmm7, 0xAA ; xmm4 == c - pshufd xmm3, xmm7, 0xFF ; xmm3 == d - pshufd xmm7, xmm7, 0 ; xmm7 == a - - movntdqa xmm0, [init+4*4] - pshufd xmm8, xmm0, 0x55 ; xmm8 == f - pshufd xmm9, xmm0, 0xAA ; xmm9 == g - pshufd xmm10, xmm0, 0xFF ; xmm10 == h - pshufd xmm0, xmm0, 0 ; xmm0 == e - -LAB_LOOP: - -;; T t1 = h + (Rotr32(e, 6) ^ Rotr32(e, 11) ^ Rotr32(e, 25)) + ((e & f) ^ AndNot(e, g)) + Expand32(g_sha256_k[j]) + w[j] - -%macro lab_loop_blk 0 - movntdqa xmm6, [data+rax*4] - paddd xmm6, g_4sha256_k[rax*4] - add rax, 4 - - paddd xmm6, xmm10 ; +h - - movdqa xmm1, xmm0 - movdqa xmm2, xmm9 - pandn xmm1, xmm2 ; ~e & g - - movdqa xmm10, xmm2 ; h = g - movdqa xmm2, xmm8 ; f - movdqa xmm9, xmm2 ; g = f - - pand xmm2, xmm0 ; e & f - pxor xmm1, xmm2 ; (e & f) ^ (~e & g) - movdqa xmm8, xmm0 ; f = e - - paddd xmm6, xmm1 ; Ch + h + w[i] + k[i] - - movdqa xmm1, xmm0 - psrld xmm0, 6 - movdqa xmm2, xmm0 - pslld xmm1, 7 - psrld xmm2, 5 - pxor xmm0, xmm1 - pxor xmm0, xmm2 - pslld xmm1, 14 - psrld xmm2, 14 - pxor xmm0, xmm1 - pxor xmm0, xmm2 - pslld xmm1, 5 - pxor xmm0, xmm1 ; Rotr32(e, 6) ^ Rotr32(e, 11) ^ Rotr32(e, 25) - paddd xmm6, xmm0 ; xmm6 = t1 - - movdqa xmm0, xmm3 ; d - paddd xmm0, xmm6 ; e = d+t1 - - movdqa xmm1, xmm5 ; =b - movdqa xmm3, xmm4 ; d = c - movdqa xmm2, xmm4 ; c - pand xmm2, xmm5 ; b & c - pand xmm4, xmm7 ; a & c - pand xmm1, xmm7 ; a & b - pxor xmm1, xmm4 - movdqa xmm4, xmm5 ; c = b - movdqa xmm5, xmm7 ; b = a - pxor xmm1, xmm2 ; (a & c) ^ (a & d) ^ (c & d) - paddd xmm6, xmm1 ; t1 + ((a & c) ^ (a & d) ^ (c & d)) - - movdqa xmm2, xmm7 - psrld xmm7, 2 - movdqa xmm1, xmm7 - pslld xmm2, 10 - psrld xmm1, 11 - pxor xmm7, xmm2 - pxor xmm7, xmm1 - pslld xmm2, 9 - psrld xmm1, 9 - pxor xmm7, xmm2 - pxor xmm7, xmm1 - pslld xmm2, 11 - pxor xmm7, xmm2 - paddd xmm7, xmm6 ; a = t1 + (Rotr32(a, 2) ^ Rotr32(a, 13) ^ Rotr32(a, 22)) + ((a & c) ^ (a & d) ^ (c & d)); -%endmacro - -%assign i 0 -%rep LAB_LOOP_UNROLL - lab_loop_blk -%assign i i+1 -%endrep - - cmp rax, temp - jb LAB_LOOP - -; Finished the 64 rounds, calculate hash and save - - movntdqa xmm1, [init] - pshufd xmm2, xmm1, 0x55 - paddd xmm5, xmm2 - pshufd xmm6, xmm1, 0xAA - paddd xmm4, xmm6 - pshufd xmm11, xmm1, 0xFF - paddd xmm3, xmm11 - pshufd xmm1, xmm1, 0 - paddd xmm7, xmm1 - - movntdqa xmm1, [init+4*4] - pshufd xmm2, xmm1, 0x55 - paddd xmm8, xmm2 - pshufd xmm6, xmm1, 0xAA - paddd xmm9, xmm6 - pshufd xmm11, xmm1, 0xFF - paddd xmm10, xmm11 - pshufd xmm1, xmm1, 0 - paddd xmm0, xmm1 - - movdqa [hash+0*16], xmm7 - movdqa [hash+1*16], xmm5 - movdqa [hash+2*16], xmm4 - movdqa [hash+3*16], xmm3 - movdqa [hash+4*16], xmm0 - movdqa [hash+5*16], xmm8 - movdqa [hash+6*16], xmm9 - movdqa [hash+7*16], xmm10 - -LAB_RET: -%ifidn __OUTPUT_FORMAT__,win64 - movdqa xmm6, [rsp + 16*0] - movdqa xmm7, [rsp + 16*1] - movdqa xmm8, [rsp + 16*2] - movdqa xmm9, [rsp + 16*3] - movdqa xmm10, [rsp + 16*4] - movdqa xmm11, [rsp + 16*5] - add rsp, 16 * 6 -%endif - pop rbx - ret - -%ifidn __OUTPUT_FORMAT__,elf -section .note.GNU-stack noalloc noexec nowrite progbits -%endif -%ifidn __OUTPUT_FORMAT__,elf64 -section .note.GNU-stack noalloc noexec nowrite progbits -%endif diff --git a/x86_64/sha256_xmm_amd64.asm b/x86_64/sha256_xmm_amd64.asm deleted file mode 100644 index 6b2ee169..00000000 --- a/x86_64/sha256_xmm_amd64.asm +++ /dev/null @@ -1,354 +0,0 @@ -;/* -; * Copyright (C) 2011 - Neil Kettle -; * -; * This file is part of cpuminer-ng. -; * -; * cpuminer-ng is free software: you can redistribute it and/or modify -; * it under the terms of the GNU General Public License as published by -; * the Free Software Foundation, either version 3 of the License, or -; * (at your option) any later version. -; * -; * cpuminer-ng is distributed in the hope that it will be useful, -; * but WITHOUT ANY WARRANTY; without even the implied warranty of -; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -; * GNU General Public License for more details. -; * -; * You should have received a copy of the GNU General Public License -; * along with cpuminer-ng. If not, see . -; */ - -; %rbp, %rbx, and %r12-%r15 - callee save - -ALIGN 32 -BITS 64 - -%ifidn __OUTPUT_FORMAT__,win64 -%define hash rcx -%define hash1 rdx -%define data r8 -%define init r9 -%else -%define hash rdi -%define hash1 rsi -%define data rdx -%define init rcx -%endif - -; 0 = (1024 - 256) (mod (LAB_CALC_UNROLL*LAB_CALC_PARA*16)) -%define SHA_CALC_W_PARA 2 -%define SHA_CALC_W_UNROLL 8 - -%define SHA_ROUND_LOOP_UNROLL 16 - -%ifidn __YASM_OBJFMT__, macho64 -extern _sha256_consts_m128i -extern _sha256_init -%else -extern sha256_consts_m128i -extern sha256_init -%endif - -%ifidn __YASM_OBJFMT__, macho64 -global _sha256_sse2_64_new -%else -global sha256_sse2_64_new -%endif - -%define sr1 xmm6 -%define sr2 xmm1 -%define sr3 xmm2 -%define sr4 xmm13 - -%define rA xmm7 -%define rB xmm5 -%define rC xmm4 -%define rD xmm3 -%define rE xmm0 -%define rF xmm8 -%define rG xmm9 -%define rH xmm10 - -%macro sha_round_blk 0 - movdqa sr1, [data+rax] ; T1 = w; - ;movdqa sr1, xmm11 - movdqa sr2, rE ; sr2 = rE - - pandn sr2, rG ; sr2 = ~rE & rG - movdqa sr3, rF ; sr3 = rF - - paddd sr1, rH ; T1 = h + sha256_consts_m128i[i] + w; - movdqa rH, rG ; rH = rG - - pand sr3, rE ; sr3 = rE & rF - movdqa rG, rF ; rG = rF - -%ifidn __YASM_OBJFMT__, macho64 - paddd sr1, [rcx+rax] -%else - paddd sr1, sha256_consts_m128i[rax] ; T1 = sha256_consts_m128i[i] + w; -%endif - pxor sr2, sr3 ; sr2 = (rE & rF) ^ (~rE & rG) = Ch (e, f, g) - - movdqa rF, rE ; rF = rE - paddd sr1, sr2 ; T1 = h + Ch (e, f, g) + sha256_consts_m128i[i] + w; - - movdqa sr2, rE ; sr2 = rE - psrld rE, 6 ; e >> 6 - - movdqa sr3, rE ; e >> 6 - pslld sr2, 7 ; e << 7 - - psrld sr3, 5 ; e >> 11 - pxor rE, sr2 ; e >> 6 ^ e << 7 - - pslld sr2, 14 ; e << 21 - pxor rE, sr3 ; e >> 6 ^ e << 7 ^ e >> 11 - - psrld sr3, 14 ; e >> 25 - pxor rE, sr2 ; e >> 6 ^ e << 7 ^ e >> 11 ^ e << 21 - - pslld sr2, 5 ; e << 26 - pxor rE, sr3 ; e >> 6 ^ e << 7 ^ e >> 11 ^ e << 21 ^ e >> 25 - - pxor rE, sr2 ; e >> 6 ^ e << 7 ^ e >> 11 ^ e << 21 ^ e >> 25 ^ e << 26 - movdqa sr2, rB ; sr2 = rB - - paddd sr1, rE ; sr1 = h + BIGSIGMA1_256(e) + Ch (e, f, g) + sha256_consts_m128i[i] + w; - movdqa rE, rD ; rE = rD - - movdqa rD, rC ; rD = rC - paddd rE, sr1 ; rE = rD + T1 - - movdqa sr3, rC ; sr3 = rC - pand rC, rA ; rC = rC & rA - - pand sr3, rB ; sr3 = rB & rC - pand sr2, rA ; sr2 = rB & rA - - pxor sr2, rC ; sr2 = (rB & rA) ^ (rC & rA) - movdqa rC, rB ; rC = rB - - pxor sr2, sr3 ; sr2 = (rB & rA) ^ (rC & rA) ^ (rB & rC) - movdqa rB, rA ; rB = rA - - paddd sr1, sr2 ; sr1 = T1 + (rB & rA) ^ (rC & rA) ^ (rB & rC) - lea rax, [rax+16] - - movdqa sr3, rA ; sr3 = rA - psrld rA, 2 ; a >> 2 - - pslld sr3, 10 ; a << 10 - movdqa sr2, rA ; a >> 2 - - pxor rA, sr3 ; a >> 2 ^ a << 10 - psrld sr2, 11 ; a >> 13 - - pxor rA, sr2 ; a >> 2 ^ a << 10 ^ a >> 13 - pslld sr3, 9 ; a << 19 - - pxor rA, sr3 ; a >> 2 ^ a << 10 ^ a >> 13 ^ a << 19 - psrld sr2, 9 ; a >> 21 - - pxor rA, sr2 ; a >> 2 ^ a << 10 ^ a >> 13 ^ a << 19 ^ a >> 21 - pslld sr3, 11 ; a << 30 - - pxor rA, sr3 ; a >> 2 ^ a << 10 ^ a >> 13 ^ a << 19 ^ a >> 21 ^ a << 30 - paddd rA, sr1 ; T1 + BIGSIGMA0_256(a) + Maj(a, b, c); -%endmacro - -%macro sha_calc_w_blk 1 - movdqa xmm0, [r11-(15-%1)*16] ; xmm0 = W[I-15] - movdqa xmm4, [r11-(15-(%1+1))*16] ; xmm4 = W[I-15+1] - movdqa xmm2, xmm0 ; xmm2 = W[I-15] - movdqa xmm6, xmm4 ; xmm6 = W[I-15+1] - psrld xmm0, 3 ; xmm0 = W[I-15] >> 3 - psrld xmm4, 3 ; xmm4 = W[I-15+1] >> 3 - movdqa xmm1, xmm0 ; xmm1 = W[I-15] >> 3 - movdqa xmm5, xmm4 ; xmm5 = W[I-15+1] >> 3 - pslld xmm2, 14 ; xmm2 = W[I-15] << 14 - pslld xmm6, 14 ; xmm6 = W[I-15+1] << 14 - psrld xmm1, 4 ; xmm1 = W[I-15] >> 7 - psrld xmm5, 4 ; xmm5 = W[I-15+1] >> 7 - pxor xmm0, xmm1 ; xmm0 = (W[I-15] >> 3) ^ (W[I-15] >> 7) - pxor xmm4, xmm5 ; xmm4 = (W[I-15+1] >> 3) ^ (W[I-15+1] >> 7) - psrld xmm1, 11 ; xmm1 = W[I-15] >> 18 - psrld xmm5, 11 ; xmm5 = W[I-15+1] >> 18 - pxor xmm0, xmm2 ; xmm0 = (W[I-15] >> 3) ^ (W[I-15] >> 7) ^ (W[I-15] << 14) - pxor xmm4, xmm6 ; xmm4 = (W[I-15+1] >> 3) ^ (W[I-15+1] >> 7) ^ (W[I-15+1] << 14) - pslld xmm2, 11 ; xmm2 = W[I-15] << 25 - pslld xmm6, 11 ; xmm6 = W[I-15+1] << 25 - pxor xmm0, xmm1 ; xmm0 = (W[I-15] >> 3) ^ (W[I-15] >> 7) ^ (W[I-15] << 14) ^ (W[I-15] >> 18) - pxor xmm4, xmm5 ; xmm4 = (W[I-15+1] >> 3) ^ (W[I-15+1] >> 7) ^ (W[I-15+1] << 14) ^ (W[I-15+1] >> 18) - pxor xmm0, xmm2 ; xmm0 = (W[I-15] >> 3) ^ (W[I-15] >> 7) ^ (W[I-15] << 14) ^ (W[I-15] >> 18) ^ (W[I-15] << 25) - pxor xmm4, xmm6 ; xmm4 = (W[I-15+1] >> 3) ^ (W[I-15+1] >> 7) ^ (W[I-15+1] << 14) ^ (W[I-15+1] >> 18) ^ (W[I-15+1] << 25) - - movdqa xmm3, [r11-(2-%1)*16] ; xmm3 = W[I-2] - movdqa xmm7, [r11-(2-(%1+1))*16] ; xmm7 = W[I-2+1] - - paddd xmm0, [r11-(16-%1)*16] ; xmm0 = s0(W[I-15]) + W[I-16] - paddd xmm4, [r11-(16-(%1+1))*16] ; xmm4 = s0(W[I-15+1]) + W[I-16+1] - -;;;;;;;;;;;;;;;;;; - - movdqa xmm2, xmm3 ; xmm2 = W[I-2] - movdqa xmm6, xmm7 ; xmm6 = W[I-2+1] - psrld xmm3, 10 ; xmm3 = W[I-2] >> 10 - psrld xmm7, 10 ; xmm7 = W[I-2+1] >> 10 - movdqa xmm1, xmm3 ; xmm1 = W[I-2] >> 10 - movdqa xmm5, xmm7 ; xmm5 = W[I-2+1] >> 10 - - paddd xmm0, [r11-(7-%1)*16] ; xmm0 = s0(W[I-15]) + W[I-16] + W[I-7] - - pslld xmm2, 13 ; xmm2 = W[I-2] << 13 - pslld xmm6, 13 ; xmm6 = W[I-2+1] << 13 - psrld xmm1, 7 ; xmm1 = W[I-2] >> 17 - psrld xmm5, 7 ; xmm5 = W[I-2+1] >> 17 - - paddd xmm4, [r11-(7-(%1+1))*16] ; xmm4 = s0(W[I-15+1]) + W[I-16+1] + W[I-7+1] - - pxor xmm3, xmm1 ; xmm3 = (W[I-2] >> 10) ^ (W[I-2] >> 17) - pxor xmm7, xmm5 ; xmm7 = (W[I-2+1] >> 10) ^ (W[I-2+1] >> 17) - psrld xmm1, 2 ; xmm1 = W[I-2] >> 19 - psrld xmm5, 2 ; xmm5 = W[I-2+1] >> 19 - pxor xmm3, xmm2 ; xmm3 = (W[I-2] >> 10) ^ (W[I-2] >> 17) ^ (W[I-2] << 13) - pxor xmm7, xmm6 ; xmm7 = (W[I-2+1] >> 10) ^ (W[I-2+1] >> 17) ^ (W[I-2+1] << 13) - pslld xmm2, 2 ; xmm2 = W[I-2] << 15 - pslld xmm6, 2 ; xmm6 = W[I-2+1] << 15 - pxor xmm3, xmm1 ; xmm3 = (W[I-2] >> 10) ^ (W[I-2] >> 17) ^ (W[I-2] << 13) ^ (W[I-2] >> 19) - pxor xmm7, xmm5 ; xmm7 = (W[I-2+1] >> 10) ^ (W[I-2+1] >> 17) ^ (W[I-2+1] << 13) ^ (W[I-2+1] >> 19) - pxor xmm3, xmm2 ; xmm3 = (W[I-2] >> 10) ^ (W[I-2] >> 17) ^ (W[I-2] << 13) ^ (W[I-2] >> 19) ^ (W[I-2] << 15) - pxor xmm7, xmm6 ; xmm7 = (W[I-2+1] >> 10) ^ (W[I-2+1] >> 17) ^ (W[I-2+1] << 13) ^ (W[I-2+1] >> 19) ^ (W[I-2+1] << 15) - - paddd xmm0, xmm3 ; xmm0 = s0(W[I-15]) + W[I-16] + s1(W[I-2]) + W[I-7] - paddd xmm4, xmm7 ; xmm4 = s0(W[I-15+1]) + W[I-16+1] + s1(W[I-2+1]) + W[I-7+1] - movdqa [r11+(%1*16)], xmm0 - movdqa [r11+((%1+1)*16)], xmm4 -%endmacro - -; _sha256_sse2_64_new hash(rdi), hash1(rsi), data(rdx), init(rcx), - -%ifidn __YASM_OBJFMT__, macho64 -_sha256_sse2_64_new: -%else -sha256_sse2_64_new: -%endif - - push rbx -%ifidn __OUTPUT_FORMAT__,win64 - sub rsp, 16 * 6 - movdqa [rsp + 16*0], xmm6 - movdqa [rsp + 16*1], xmm7 - movdqa [rsp + 16*2], xmm8 - movdqa [rsp + 16*3], xmm9 - movdqa [rsp + 16*4], xmm10 - movdqa [rsp + 16*5], xmm13 -%endif - -%macro SHA_256 0 - mov rbx, 64*4 ; rbx is # of SHA-2 rounds - mov rax, 16*4 ; rax is where we expand to - - push rbx - lea rbx, qword [data+rbx*4] - lea r11, qword [data+rax*4] - -%%SHA_CALC_W: -%assign i 0 -%rep SHA_CALC_W_UNROLL - sha_calc_w_blk i -%assign i i+SHA_CALC_W_PARA -%endrep - add r11, SHA_CALC_W_UNROLL*SHA_CALC_W_PARA*16 - cmp r11, rbx - jb %%SHA_CALC_W - - pop rbx - mov rax, 0 - lea rbx, [rbx*4] - - movdqa rA, [init] - pshufd rB, rA, 0x55 ; rB == B - pshufd rC, rA, 0xAA ; rC == C - pshufd rD, rA, 0xFF ; rD == D - pshufd rA, rA, 0 ; rA == A - - movdqa rE, [init+4*4] - pshufd rF, rE, 0x55 ; rF == F - pshufd rG, rE, 0xAA ; rG == G - pshufd rH, rE, 0xFF ; rH == H - pshufd rE, rE, 0 ; rE == E - -%ifidn __YASM_OBJFMT__, macho64 - lea rcx, [_sha256_consts_m128i wrt rip] -%endif - -%%SHAROUND_LOOP: -%assign i 0 -%rep SHA_ROUND_LOOP_UNROLL - sha_round_blk -%assign i i+1 -%endrep - cmp rax, rbx - jb %%SHAROUND_LOOP - -; Finished the 64 rounds, calculate hash and save - - movdqa sr1, [init] - pshufd sr2, sr1, 0x55 - pshufd sr3, sr1, 0xAA - pshufd sr4, sr1, 0xFF - pshufd sr1, sr1, 0 - - paddd rB, sr2 - paddd rC, sr3 - paddd rD, sr4 - paddd rA, sr1 - - movdqa sr1, [init+4*4] - pshufd sr2, sr1, 0x55 - pshufd sr3, sr1, 0xAA - pshufd sr4, sr1, 0xFF - pshufd sr1, sr1, 0 - - paddd rF, sr2 - paddd rG, sr3 - paddd rH, sr4 - paddd rE, sr1 -%endmacro - - SHA_256 - movdqa [hash1+0*16], rA - movdqa [hash1+1*16], rB - movdqa [hash1+2*16], rC - movdqa [hash1+3*16], rD - movdqa [hash1+4*16], rE - movdqa [hash1+5*16], rF - movdqa [hash1+6*16], rG - movdqa [hash1+7*16], rH - - mov data, hash1 - mov init, sha256_init - - SHA_256 - - movdqa [hash+7*16], rH - -LAB_RET: -%ifidn __OUTPUT_FORMAT__,win64 - movdqa xmm6, [rsp + 16*0] - movdqa xmm7, [rsp + 16*1] - movdqa xmm8, [rsp + 16*2] - movdqa xmm9, [rsp + 16*3] - movdqa xmm10, [rsp + 16*4] - movdqa xmm13, [rsp + 16*5] - add rsp, 16 * 6 -%endif - pop rbx - ret - -%ifidn __OUTPUT_FORMAT__,elf -section .note.GNU-stack noalloc noexec nowrite progbits -%endif -%ifidn __OUTPUT_FORMAT__,elf64 -section .note.GNU-stack noalloc noexec nowrite progbits -%endif