From dde70397267ce6dc07d074087eb92625d65dba20 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Tue, 14 Jun 2011 10:32:54 +1000 Subject: [PATCH] Merge gpumining from oclmine. Unstable. --- Makefile.am | 3 +- configure.ac | 2 + cpu-miner.c | 248 ++++++++++++++++++++++++++++++++++++++++---- findnonce.c | 197 +++++++++++++++++++++++++++++++++++ findnonce.h | 23 +++++ miner.h | 2 + ocl.c | 271 ++++++++++++++++++++++++++++++++++++++++++++++++ ocl.h | 22 ++++ oclminer.cl | 284 +++++++++++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 1032 insertions(+), 20 deletions(-) create mode 100644 findnonce.c create mode 100644 findnonce.h create mode 100644 ocl.c create mode 100644 ocl.h create mode 100644 oclminer.cl diff --git a/Makefile.am b/Makefile.am index 7bc13e58..82c58cac 100644 --- a/Makefile.am +++ b/Makefile.am @@ -15,10 +15,11 @@ bin_PROGRAMS = minerd minerd_SOURCES = elist.h miner.h compat.h \ cpu-miner.c util.c \ + ocl.c findnonce.c \ sha256_generic.c sha256_4way.c sha256_via.c \ sha256_cryptopp.c sha256_sse2_amd64.c minerd_LDFLAGS = $(PTHREAD_FLAGS) -minerd_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ +minerd_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @OPENCL_LIBS@ minerd_CPPFLAGS = @LIBCURL_CPPFLAGS@ if HAVE_x86_64 diff --git a/configure.ac b/configure.ac index 72b8a9f2..62c8bd7d 100644 --- a/configure.ac +++ b/configure.ac @@ -40,6 +40,7 @@ case $target in esac +AC_CHECK_LIB(OpenCL, clSetKernelArg, OPENCL_LIBS=-lOpenCL) AC_CHECK_LIB(jansson, json_loads, request_jansson=false, request_jansson=true) AC_CHECK_LIB(pthread, pthread_create, PTHREAD_LIBS=-lpthread) @@ -95,6 +96,7 @@ PKG_PROG_PKG_CONFIG() LIBCURL_CHECK_CONFIG(, 7.10.1, , [AC_MSG_ERROR([Missing required libcurl >= 7.10.1])]) +AC_SUBST(OPENCL_LIBS) AC_SUBST(JANSSON_LIBS) AC_SUBST(PTHREAD_FLAGS) AC_SUBST(PTHREAD_LIBS) diff --git a/cpu-miner.c b/cpu-miner.c index 374ffd81..e9de48d6 100644 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -19,6 +19,7 @@ #include #include #include +#include #ifndef WIN32 #include #endif @@ -27,6 +28,8 @@ #include #include "compat.h" #include "miner.h" +#include "findnonce.h" +#include "ocl.h" #define PROGRAM_NAME "minerd" #define DEF_RPC_URL "http://127.0.0.1:8332/" @@ -108,6 +111,7 @@ static const char *algo_names[] = { bool opt_debug = false; bool opt_protocol = false; +bool opt_ndevs = false; bool want_longpoll = true; bool have_longpoll = false; bool use_syslog = false; @@ -122,6 +126,7 @@ static enum sha256_algos opt_algo = ALGO_SSE2_64; #else static enum sha256_algos opt_algo = ALGO_C; #endif +static int nDevs; static int opt_n_threads; static int num_processors; static char *rpc_url; @@ -135,7 +140,7 @@ pthread_mutex_t time_lock; static pthread_mutex_t hash_lock; static unsigned long total_hashes_done; static struct timeval total_tv_start; -static int solutions; +static int accepted, rejected; struct option_help { @@ -175,6 +180,9 @@ static struct option_help options_help[] = { { "debug", "(-D) Enable debug output (default: off)" }, + { "ndevs", + "(-n) Display number of detected GPUs" }, + { "no-longpoll", "Disable X-Long-Polling support (default: enabled)" }, @@ -223,6 +231,7 @@ static struct option options[] = { { "config", 1, NULL, 'c' }, { "debug", 0, NULL, 'D' }, { "help", 0, NULL, 'h' }, + { "ndevs", 0, NULL, 'n' }, { "no-longpoll", 0, NULL, 1003 }, { "pass", 1, NULL, 'p' }, { "protocol-dump", 0, NULL, 'P' }, @@ -237,8 +246,6 @@ static struct option options[] = { { "url", 1, NULL, 1001 }, { "user", 1, NULL, 'u' }, { "userpass", 1, NULL, 1002 }, - - { } }; struct work { @@ -248,6 +255,12 @@ struct work { unsigned char target[32]; unsigned char hash[32]; + + uint32_t output[MAXTHREADS]; + uint32_t res_nonce; + uint32_t valid; + uint32_t ready; + dev_blk_ctx blk; }; static bool jobj_binary(const json_t *obj, const char *key, @@ -335,10 +348,12 @@ static bool submit_upstream_work(CURL *curl, const struct work *work) res = json_object_get(val, "result"); if (json_is_true(res)) { - solutions++; + accepted++; applog(LOG_INFO, "PROOF OF WORK RESULT: true (yay!!!)"); - } else + } else { + rejected++; applog(LOG_INFO, "PROOF OF WORK RESULT: false (booooo)"); + } json_decref(val); @@ -493,7 +508,7 @@ static void hashmeter(int thr_id, struct timeval *diff, khashes = hashes_done / 1000.0; secs = (double)diff->tv_sec + ((double)diff->tv_usec / 1000000.0); - if (opt_n_threads > 1) { + if (opt_n_threads + nDevs > 1) { double total_mhashes, total_secs; /* Totals are updated by all threads so can race without locking */ @@ -505,13 +520,17 @@ static void hashmeter(int thr_id, struct timeval *diff, pthread_mutex_unlock(&hash_lock); total_secs = (double)total_diff.tv_sec + ((double)total_diff.tv_usec / 1000000.0); - applog(LOG_INFO, "[Total: %.2f Mhash/sec] " - "[thread %d: %lu hashes, %.0f khash/sec] [Solved: %d]", - total_mhashes / total_secs, thr_id, hashes_done, - khashes / secs, solutions); + if (opt_debug) + applog(LOG_DEBUG, "[thread %d: %lu hashes, %.0f khash/sec]", + thr_id, hashes_done); + if (!thr_id) + applog(LOG_INFO, "[%.2f Mhash/sec] [%d Accepted] [%d Rejected]", + total_mhashes / total_secs, accepted, rejected); } else { - applog(LOG_INFO, "[%lu hashes, %.0f khash/sec] [Solved: %d]", - hashes_done, khashes / secs, solutions); + if (opt_debug) + applog(LOG_DEBUG, "[%lu hashes]", hashes_done); + applog(LOG_INFO, "%.0f khash/sec] [%d Accepted] [%d Rejected]", + khashes / secs, accepted, rejected); } } @@ -574,6 +593,15 @@ err_out: return false; } +bool submit_nonce(struct thr_info *thr, struct work *work, uint32_t nonce) +{ + work->data[64+12+0] = (nonce>>0) & 0xff; + work->data[64+12+1] = (nonce>>8) & 0xff; + work->data[64+12+2] = (nonce>>16) & 0xff; + work->data[64+12+3] = (nonce>>24) & 0xff; + return submit_work(thr, work); +} + static void *miner_thread(void *userdata) { struct thr_info *mythr = userdata; @@ -693,11 +721,162 @@ out: return NULL; } +enum { + STAT_SLEEP_INTERVAL = 1, + STAT_CTR_INTERVAL = 10000000, + FAILURE_INTERVAL = 30, +}; + +static int block = 0; +static _clState *clStates[16]; + +static void *gpuminer_thread(void *userdata) +{ + struct thr_info *mythr = userdata; + int thr_id = mythr->id; + int failures = 0; + + uint32_t res[MAXTHREADS]; + + setpriority(PRIO_PROCESS, 0, 19); + drop_policy(); + + size_t globalThreads[1]; + size_t localThreads[1]; + + cl_int status; + + _clState *clState = clStates[thr_id]; + + status = clSetKernelArg(clState->kernel, 0, sizeof(cl_mem), (void *)&clState->inputBuffer); + if(status != CL_SUCCESS) { printf("Error: Setting kernel argument 1.\n"); return false; } + + status = clSetKernelArg(clState->kernel, 1, sizeof(cl_mem), (void *)&clState->outputBuffer); + if(status != CL_SUCCESS) { printf("Error: Setting kernel argument 2.\n"); return false; } + + struct work *work; + work = malloc(sizeof(struct work)*2); + + work[0].ready = 0; + work[1].ready = 0; + + int frame = 0; + int res_frame = 0; + int my_block = block; + bool need_work = true; + unsigned long hashes_done; + hashes_done = 0; + + unsigned int h0count = 0; + + while (1) { + struct timeval tv_start, tv_end, diff; + int threads; + bool rc; + + gettimeofday(&tv_start, NULL); + + if (need_work || my_block != block) { + frame++; + frame %= 2; + + if (opt_debug) + fprintf(stderr, "getwork\n"); + + /* obtain new work from internal workio thread */ + if (unlikely(!get_work(mythr, work + frame))) { + applog(LOG_ERR, "work retrieval failed, exiting " + "gpu mining thread %d", mythr->id); + goto out; + } + + precalc_hash(&work[frame].blk, (uint32_t *)(work[frame].midstate), (uint32_t *)(work[frame].data + 64)); + + work[frame].blk.nonce = 0; + work[frame].valid = true; + work[frame].ready = 0; + + my_block = block; + need_work = false; + } + + threads = 102400 * 4; + globalThreads[0] = threads; + localThreads[0] = 128; + + status = clEnqueueWriteBuffer(clState->commandQueue, clState->inputBuffer, CL_TRUE, 0, + sizeof(dev_blk_ctx), (void *)&work[frame].blk, 0, NULL, NULL); + if(status != CL_SUCCESS) { printf("Error: clEnqueueWriteBuffer failed.\n"); goto out; } + + clFinish(clState->commandQueue); + + status = clEnqueueNDRangeKernel(clState->commandQueue, clState->kernel, 1, NULL, + globalThreads, localThreads, 0, NULL, NULL); + if (status != CL_SUCCESS) { printf("Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)\n"); goto out; } + + clFlush(clState->commandQueue); + + hashes_done = 1024 * threads; + + if (work[res_frame].ready) { + rc = false; + + uint32_t bestG = ~0; + uint32_t nonce; + int j; + for(j = 0; j < work[res_frame].ready; j++) { + if(res[j]) { + uint32_t start = (work[res_frame].res_nonce + j)<<10; + uint32_t my_g, my_nonce; + my_g = postcalc_hash(mythr, &work[res_frame].blk, &work[res_frame], start, start + 1026, &my_nonce, &h0count); + + rc = true; + } + } + + work[res_frame].ready = false; + + uint32_t *target = (uint32_t *)(work[res_frame].target + 24); + } + + gettimeofday(&tv_end, NULL); + timeval_subtract(&diff, &tv_end, &tv_start); + + hashmeter(thr_id, &diff, hashes_done); + + /* adjust max_nonce to meet target scan time */ + if (diff.tv_usec > 500000) + diff.tv_sec++; + if (diff.tv_sec > 0) + applog(LOG_INFO, "Not reaching opt_scantime by %d", diff.tv_sec); + + status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0, + sizeof(uint32_t) * threads, res, 0, NULL, NULL); + if (status != CL_SUCCESS) { printf("Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer)\n"); goto out;} + + res_frame = frame; + work[res_frame].ready = threads; + work[res_frame].res_nonce = work[res_frame].blk.nonce; + + work[frame].blk.nonce += threads; + + if (unlikely(work[frame].blk.nonce > 4000000 - threads)) + need_work = true; + + failures = 0; + } + +out: + tq_freeze(mythr->q); + + return NULL; +} + static void restart_threads(void) { int i; - for (i = 0; i < opt_n_threads; i++) + for (i = 0; i < opt_n_threads + nDevs; i++) work_restart[i].restart = 1; } @@ -948,6 +1127,13 @@ int main (int argc, char *argv[]) { struct thr_info *thr; int i; + char name[32]; + + nDevs = clDevicesNum(); + if (opt_ndevs) { + printf("%i\n", nDevs); + return nDevs; + } rpc_url = strdup(DEF_RPC_URL); @@ -975,16 +1161,16 @@ int main (int argc, char *argv[]) openlog("cpuminer", LOG_PID, LOG_USER); #endif - work_restart = calloc(opt_n_threads, sizeof(*work_restart)); + work_restart = calloc(opt_n_threads + nDevs, sizeof(*work_restart)); if (!work_restart) return 1; - thr_info = calloc(opt_n_threads + 2, sizeof(*thr)); + thr_info = calloc(opt_n_threads + 2 + nDevs, sizeof(*thr)); if (!thr_info) return 1; /* init workio thread info */ - work_thr_id = opt_n_threads; + work_thr_id = opt_n_threads + nDevs; thr = &thr_info[work_thr_id]; thr->id = work_thr_id; thr->q = tq_new(); @@ -999,7 +1185,7 @@ int main (int argc, char *argv[]) /* init longpoll thread info */ if (want_longpoll) { - longpoll_thr_id = opt_n_threads + 1; + longpoll_thr_id = opt_n_threads + nDevs + 1; thr = &thr_info[longpoll_thr_id]; thr->id = longpoll_thr_id; thr->q = tq_new(); @@ -1015,8 +1201,32 @@ int main (int argc, char *argv[]) longpoll_thr_id = -1; gettimeofday(&total_tv_start, NULL); + + /* start gpu mining threads */ + for (i = 0; i < nDevs; i++) { + thr = &thr_info[i]; + + thr->id = i; + thr->q = tq_new(); + if (!thr->q) + return 1; + + printf("Init GPU %i\n", i); + clStates[i] = initCl(i, name, sizeof(name)); + printf("initCl() finished. Found %s\n", name); + + if (unlikely(pthread_create(&thr->pth, NULL, gpuminer_thread, thr))) { + applog(LOG_ERR, "thread %d create failed", i); + return 1; + } + + sleep(1); /* don't pound RPC server all at once */ + } + + fprintf(stderr, "%d gpu miner threads started\n", i); + /* start mining threads */ - for (i = 0; i < opt_n_threads; i++) { + for (i = nDevs; i < nDevs + opt_n_threads; i++) { thr = &thr_info[i]; thr->id = i; @@ -1032,7 +1242,7 @@ int main (int argc, char *argv[]) sleep(1); /* don't pound RPC server all at once */ } - applog(LOG_INFO, "%d miner threads started, " + applog(LOG_INFO, "%d cpu miner threads started, " "using SHA256 '%s' algorithm.", opt_n_threads, algo_names[opt_algo]); diff --git a/findnonce.c b/findnonce.c new file mode 100644 index 00000000..58c89e86 --- /dev/null +++ b/findnonce.c @@ -0,0 +1,197 @@ +/* + * Copyright 2011 Nils Schneider + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. See COPYING for more details. + */ + +#include +#include + +#include "ocl.h" +#include "findnonce.h" + +const uint32_t SHA256_K[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +inline uint32_t ByteReverse(uint32_t value) +{ + __asm__ ("bswap %0" : "=r" (value) : "0" (value)); + return value; +} + +#define rotate(x,y) ((x<>(sizeof(x)*8-y))) +#define rotr(x,y) ((x>>y) | (x<<(sizeof(x)*8-y))) + +#define R(a, b, c, d, e, f, g, h, w, k) \ + h = h + (rotate(e, 26) ^ rotate(e, 21) ^ rotate(e, 7)) + (g ^ (e & (f ^ g))) + k + w; \ + d = d + h; \ + h = h + (rotate(a, 30) ^ rotate(a, 19) ^ rotate(a, 10)) + ((a & b) | (c & (a | b))) + +void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) { + cl_uint A, B, C, D, E, F, G, H; + + A = state[0]; + B = state[1]; + C = state[2]; + D = state[3]; + E = state[4]; + F = state[5]; + G = state[6]; + H = state[7]; + + R(A, B, C, D, E, F, G, H, data[0], SHA256_K[0]); + R(H, A, B, C, D, E, F, G, data[1], SHA256_K[1]); + R(G, H, A, B, C, D, E, F, data[2], SHA256_K[2]); + + blk->cty_a = A; + blk->cty_b = B; + blk->cty_c = C; + blk->cty_d = D; + blk->cty_e = E; + blk->cty_f = F; + blk->cty_g = G; + blk->cty_h = H; + + blk->ctx_a = state[0]; + blk->ctx_b = state[1]; + blk->ctx_c = state[2]; + blk->ctx_d = state[3]; + blk->ctx_e = state[4]; + blk->ctx_f = state[5]; + blk->ctx_g = state[6]; + blk->ctx_h = state[7]; + + blk->merkle = data[0]; + blk->ntime = data[1]; + blk->nbits = data[2]; + + blk->fW0 = data[0] + (rotr(data[1], 7) ^ rotr(data[1], 18) ^ (data[1] >> 3)); + blk->fW1 = data[1] + (rotr(data[2], 7) ^ rotr(data[2], 18) ^ (data[2] >> 3)) + 0x01100000; + blk->fW2 = data[2] + (rotr(blk->fW0, 17) ^ rotr(blk->fW0, 19) ^ (blk->fW0 >> 10)); + blk->fW3 = 0x11002000 + (rotr(blk->fW1, 17) ^ rotr(blk->fW1, 19) ^ (blk->fW1 >> 10)); + blk->fW15 = 0x00000280 + (rotr(blk->fW0, 7) ^ rotr(blk->fW0, 18) ^ (blk->fW0 >> 3)); + blk->fW01r = blk->fW0 + (rotr(blk->fW1, 7) ^ rotr(blk->fW1, 18) ^ (blk->fW1 >> 3)); + + blk->fcty_e = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + 0xe9b5dba5; + blk->fcty_e2 = (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G))); +} + +#define P(t) (W[(t)&0xF] = W[(t-16)&0xF] + (rotate(W[(t-15)&0xF], 25) ^ rotate(W[(t-15)&0xF], 14) ^ (W[(t-15)&0xF] >> 3)) + W[(t-7)&0xF] + (rotate(W[(t-2)&0xF], 15) ^ rotate(W[(t-2)&0xF], 13) ^ (W[(t-2)&0xF] >> 10))) + +#define IR(u) \ + R(A, B, C, D, E, F, G, H, W[u+0], SHA256_K[u+0]); \ + R(H, A, B, C, D, E, F, G, W[u+1], SHA256_K[u+1]); \ + R(G, H, A, B, C, D, E, F, W[u+2], SHA256_K[u+2]); \ + R(F, G, H, A, B, C, D, E, W[u+3], SHA256_K[u+3]); \ + R(E, F, G, H, A, B, C, D, W[u+4], SHA256_K[u+4]); \ + R(D, E, F, G, H, A, B, C, W[u+5], SHA256_K[u+5]); \ + R(C, D, E, F, G, H, A, B, W[u+6], SHA256_K[u+6]); \ + R(B, C, D, E, F, G, H, A, W[u+7], SHA256_K[u+7]) +#define FR(u) \ + R(A, B, C, D, E, F, G, H, P(u+0), SHA256_K[u+0]); \ + R(H, A, B, C, D, E, F, G, P(u+1), SHA256_K[u+1]); \ + R(G, H, A, B, C, D, E, F, P(u+2), SHA256_K[u+2]); \ + R(F, G, H, A, B, C, D, E, P(u+3), SHA256_K[u+3]); \ + R(E, F, G, H, A, B, C, D, P(u+4), SHA256_K[u+4]); \ + R(D, E, F, G, H, A, B, C, P(u+5), SHA256_K[u+5]); \ + R(C, D, E, F, G, H, A, B, P(u+6), SHA256_K[u+6]); \ + R(B, C, D, E, F, G, H, A, P(u+7), SHA256_K[u+7]) + +#define PIR(u) \ + R(F, G, H, A, B, C, D, E, W[u+3], SHA256_K[u+3]); \ + R(E, F, G, H, A, B, C, D, W[u+4], SHA256_K[u+4]); \ + R(D, E, F, G, H, A, B, C, W[u+5], SHA256_K[u+5]); \ + R(C, D, E, F, G, H, A, B, W[u+6], SHA256_K[u+6]); \ + R(B, C, D, E, F, G, H, A, W[u+7], SHA256_K[u+7]) + +#define PFR(u) \ + R(A, B, C, D, E, F, G, H, P(u+0), SHA256_K[u+0]); \ + R(H, A, B, C, D, E, F, G, P(u+1), SHA256_K[u+1]); \ + R(G, H, A, B, C, D, E, F, P(u+2), SHA256_K[u+2]); \ + R(F, G, H, A, B, C, D, E, P(u+3), SHA256_K[u+3]); \ + R(E, F, G, H, A, B, C, D, P(u+4), SHA256_K[u+4]); \ + R(D, E, F, G, H, A, B, C, P(u+5), SHA256_K[u+5]) + +uint32_t postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk, + struct work *work, uint32_t start, uint32_t end, + uint32_t *best_nonce, unsigned int *h0count) +{ + cl_uint A, B, C, D, E, F, G, H; + cl_uint W[16]; + cl_uint nonce; + cl_uint best_g = ~0; + + work_restart[thr->id].restart = 0; + for (nonce = start; nonce != end; nonce+=1) { + A = blk->cty_a; B = blk->cty_b; + C = blk->cty_c; D = blk->cty_d; + E = blk->cty_e; F = blk->cty_f; + G = blk->cty_g; H = blk->cty_h; + W[0] = blk->merkle; W[1] = blk->ntime; + W[2] = blk->nbits; W[3] = nonce;; + W[4] = 0x80000000; W[5] = 0x00000000; W[6] = 0x00000000; W[7] = 0x00000000; + W[8] = 0x00000000; W[9] = 0x00000000; W[10] = 0x00000000; W[11] = 0x00000000; + W[12] = 0x00000000; W[13] = 0x00000000; W[14] = 0x00000000; W[15] = 0x00000280; + PIR(0); IR(8); + FR(16); FR(24); + FR(32); FR(40); + FR(48); FR(56); + + W[0] = A + blk->ctx_a; W[1] = B + blk->ctx_b; + W[2] = C + blk->ctx_c; W[3] = D + blk->ctx_d; + W[4] = E + blk->ctx_e; W[5] = F + blk->ctx_f; + W[6] = G + blk->ctx_g; W[7] = H + blk->ctx_h; + W[8] = 0x80000000; W[9] = 0x00000000; W[10] = 0x00000000; W[11] = 0x00000000; + W[12] = 0x00000000; W[13] = 0x00000000; W[14] = 0x00000000; W[15] = 0x00000100; + A = 0x6a09e667; B = 0xbb67ae85; + C = 0x3c6ef372; D = 0xa54ff53a; + E = 0x510e527f; F = 0x9b05688c; + G = 0x1f83d9ab; H = 0x5be0cd19; + IR(0); IR(8); + FR(16); FR(24); + FR(32); FR(40); + FR(48); PFR(56); + + if (unlikely(H == 0xA41F32E7)) { + (*h0count)++; + + if (unlikely(submit_nonce(thr, work, nonce) == false)) { + applog(LOG_ERR, "Failed to submit work, exiting"); + goto out; + } + + G += 0x1f83d9ab; + G = ByteReverse(G); + + if (G < best_g) { + *best_nonce = nonce; + best_g = G; + } + } + if (work_restart[thr->id].restart) + break; + } +out: + if (best_g == ~0) printf("No best_g found! Error in OpenCL code?\n"); + + return best_g; +} diff --git a/findnonce.h b/findnonce.h new file mode 100644 index 00000000..007b5cc4 --- /dev/null +++ b/findnonce.h @@ -0,0 +1,23 @@ +#define MAXTHREADS 2000000 + +#ifdef __APPLE_CC__ +#include +#else +#include +#endif +#include "miner.h" + +typedef struct { + cl_uint ctx_a; cl_uint ctx_b; cl_uint ctx_c; cl_uint ctx_d; + cl_uint ctx_e; cl_uint ctx_f; cl_uint ctx_g; cl_uint ctx_h; + cl_uint cty_a; cl_uint cty_b; cl_uint cty_c; cl_uint cty_d; + cl_uint cty_e; cl_uint cty_f; cl_uint cty_g; cl_uint cty_h; + cl_uint merkle; cl_uint ntime; cl_uint nbits; cl_uint nonce; + cl_uint fW0; cl_uint fW1; cl_uint fW2; cl_uint fW3; cl_uint fW15; + cl_uint fW01r; cl_uint fcty_e; cl_uint fcty_e2; +} dev_blk_ctx; + +extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data); +extern uint32_t postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk, + struct work *work, uint32_t start, uint32_t end, + uint32_t *best_nonce, unsigned int *h0count); diff --git a/miner.h b/miner.h index e72404f2..1b7ab1d5 100644 --- a/miner.h +++ b/miner.h @@ -194,6 +194,8 @@ extern bool use_syslog; extern struct thr_info *thr_info; extern int longpoll_thr_id; extern struct work_restart *work_restart; +struct work; +bool submit_nonce(struct thr_info *thr, struct work *work, uint32_t nonce); extern void applog(int prio, const char *fmt, ...); extern struct thread_q *tq_new(void); diff --git a/ocl.c b/ocl.c new file mode 100644 index 00000000..4f7826cd --- /dev/null +++ b/ocl.c @@ -0,0 +1,271 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "findnonce.h" +#include "ocl.h" + +char *file_contents(const char *filename, int *length) +{ + FILE *f = fopen(filename, "r"); + void *buffer; + + if (!f) { + fprintf(stderr, "Unable to open %s for reading\n", filename); + return NULL; + } + + fseek(f, 0, SEEK_END); + *length = ftell(f); + fseek(f, 0, SEEK_SET); + + buffer = malloc(*length+1); + *length = fread(buffer, 1, *length, f); + fclose(f); + ((char*)buffer)[*length] = '\0'; + + return (char*)buffer; +} + +int clDevicesNum() { + cl_int status = 0; + + cl_uint numPlatforms; + cl_platform_id platform = NULL; + status = clGetPlatformIDs(0, NULL, &numPlatforms); + if(status != CL_SUCCESS) + { + printf("Error: Getting Platforms. (clGetPlatformsIDs)\n"); + return -1; + } + + if(numPlatforms > 0) + { + cl_platform_id* platforms = (cl_platform_id *)malloc(numPlatforms*sizeof(cl_platform_id)); + status = clGetPlatformIDs(numPlatforms, platforms, NULL); + if(status != CL_SUCCESS) + { + printf("Error: Getting Platform Ids. (clGetPlatformsIDs)\n"); + return -1; + } + + unsigned int i; + for(i=0; i < numPlatforms; ++i) + { + char pbuff[100]; + status = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuff), pbuff, NULL); + if(status != CL_SUCCESS) + { + printf("Error: Getting Platform Info. (clGetPlatformInfo)\n"); + free(platforms); + return -1; + } + platform = platforms[i]; + if(!strcmp(pbuff, "Advanced Micro Devices, Inc.")) + { + break; + } + } + free(platforms); + } + + if(platform == NULL) { + perror("NULL platform found!\n"); + return -1; + } + + cl_uint numDevices; + status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices); + if(status != CL_SUCCESS) + { + printf("Error: Getting Device IDs (num)\n"); + return -1; + } + + return numDevices; +} + +_clState *initCl(int gpu, char *name, size_t nameSize) { + cl_int status = 0; + + _clState *clState = malloc(sizeof(_clState));; + + cl_uint numPlatforms; + cl_platform_id platform = NULL; + status = clGetPlatformIDs(0, NULL, &numPlatforms); + if(status != CL_SUCCESS) + { + printf("Error: Getting Platforms. (clGetPlatformsIDs)\n"); + return NULL; + } + + if(numPlatforms > 0) + { + cl_platform_id* platforms = (cl_platform_id *)malloc(numPlatforms*sizeof(cl_platform_id)); + status = clGetPlatformIDs(numPlatforms, platforms, NULL); + if(status != CL_SUCCESS) + { + printf("Error: Getting Platform Ids. (clGetPlatformsIDs)\n"); + return NULL; + } + + unsigned int i; + for(i=0; i < numPlatforms; ++i) + { + char pbuff[100]; + status = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuff), pbuff, NULL); + if(status != CL_SUCCESS) + { + printf("Error: Getting Platform Info. (clGetPlatformInfo)\n"); + free(platforms); + return NULL; + } + platform = platforms[i]; + if(!strcmp(pbuff, "Advanced Micro Devices, Inc.")) + { + break; + } + } + free(platforms); + } + + if(platform == NULL) { + perror("NULL platform found!\n"); + return NULL; + } + + cl_uint numDevices; + status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices); + if(status != CL_SUCCESS) + { + printf("Error: Getting Device IDs (num)\n"); + return NULL; + } + + cl_device_id *devices; + if(numDevices > 0 ) { + devices = (cl_device_id *)malloc(numDevices*sizeof(cl_device_id)); + + /* Now, get the device list data */ + + status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL); + if(status != CL_SUCCESS) + { + printf("Error: Getting Device IDs (list)\n"); + return NULL; + } + + printf("List of devices:\n"); + + int i; + for(i=0; i= 0 && gpu < numDevices) { + char pbuff[100]; + status = clGetDeviceInfo(devices[gpu], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL); + if(status != CL_SUCCESS) + { + printf("Error: Getting Device Info\n"); + return NULL; + } + + printf("Selected %i: %s\n", gpu, pbuff); + strncpy(name, pbuff, nameSize); + } else { + printf("Invalid GPU %i\n", gpu); + return NULL; + } + + } else return NULL; + + cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; + + clState->context = clCreateContextFromType(cps, CL_DEVICE_TYPE_GPU, NULL, NULL, &status); + if(status != CL_SUCCESS) + { + printf("Error: Creating Context. (clCreateContextFromType)\n"); + return NULL; + } + + + ///////////////////////////////////////////////////////////////// + // Load CL file, build CL program object, create CL kernel object + ///////////////////////////////////////////////////////////////// + // + const char * filename = "oclminer.cl"; + int pl; + char *source = file_contents(filename, &pl); + size_t sourceSize[] = {(size_t)pl}; + + clState->program = clCreateProgramWithSource(clState->context, 1, (const char **)&source, sourceSize, &status); + if(status != CL_SUCCESS) + { + printf("Error: Loading Binary into cl_program (clCreateProgramWithBinary)\n"); + return NULL; + } + + /* create a cl program executable for all the devices specified */ + status = clBuildProgram(clState->program, 1, &devices[gpu], NULL, NULL, NULL); + if(status != CL_SUCCESS) + { + printf("Error: Building Program (clBuildProgram)\n"); + size_t logSize; + status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize); + + char *log = malloc(logSize); + status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, logSize, log, NULL); + printf("%s\n", log); + return NULL; + } + + /* get a kernel object handle for a kernel with the given name */ + clState->kernel = clCreateKernel(clState->program, "oclminer", &status); + if(status != CL_SUCCESS) + { + printf("Error: Creating Kernel from program. (clCreateKernel)\n"); + return NULL; + } + + ///////////////////////////////////////////////////////////////// + // Create an OpenCL command queue + ///////////////////////////////////////////////////////////////// + clState->commandQueue = clCreateCommandQueue( clState->context, devices[gpu], 0, &status); + if(status != CL_SUCCESS) + { + printf("Creating Command Queue. (clCreateCommandQueue)\n"); + return NULL; + } + + clState->inputBuffer = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, sizeof(dev_blk_ctx), NULL, &status); + if(status != CL_SUCCESS) { + printf("Error: clCreateBuffer (inputBuffer)\n"); + return NULL; + } + + clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, sizeof(uint32_t) * MAXTHREADS, NULL, &status); + if(status != CL_SUCCESS) { + printf("Error: clCreateBuffer (outputBuffer)\n"); + return NULL; + } + + return clState; +} + diff --git a/ocl.h b/ocl.h new file mode 100644 index 00000000..68c16702 --- /dev/null +++ b/ocl.h @@ -0,0 +1,22 @@ +#ifndef __OCL_H__ +#define __OCL_H__ +#ifdef __APPLE_CC__ +#include +#else +#include +#endif + +typedef struct { + cl_context context; + cl_kernel kernel; + cl_command_queue commandQueue; + cl_program program; + cl_mem inputBuffer; + cl_mem outputBuffer; +} _clState; + +extern char *file_contents(const char *filename, int *length); +extern int clDevicesNum(); +extern _clState *initCl(int gpu, char *name, size_t nameSize); + +#endif /* __OCL_H__ */ diff --git a/oclminer.cl b/oclminer.cl new file mode 100644 index 00000000..1cd6f57d --- /dev/null +++ b/oclminer.cl @@ -0,0 +1,284 @@ +#define rotr(x, n) rotate(x, (uint)(32 - n)) + +#define WGS __attribute__((reqd_work_group_size(128, 1, 1))) + +__constant uint K[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +typedef struct { + uint ctx_a; uint ctx_b; uint ctx_c; uint ctx_d; + uint ctx_e; uint ctx_f; uint ctx_g; uint ctx_h; + uint cty_a; uint cty_b; uint cty_c; uint cty_d; + uint cty_e; uint cty_f; uint cty_g; uint cty_h; + uint merkle; uint ntime; uint nbits; uint nonce; + uint fW0; uint fW1; uint fW2; uint fW3; uint fW15; + uint fW01r; uint fcty_e; uint fcty_e2; +} dev_blk_ctx; + +__kernel __attribute__((vec_type_hint(uint))) WGS void oclminer( + __constant dev_blk_ctx *ctx, __global uint *output) +{ + const uint fW0 = ctx->fW0; + const uint fW1 = ctx->fW1; + const uint fW2 = ctx->fW2; + const uint fW3 = ctx->fW3; + const uint fW15 = ctx->fW15; + const uint fW01r = ctx->fW01r; + const uint fcty_e = ctx->fcty_e; + const uint fcty_e2 = ctx->fcty_e2; + const uint state0 = ctx->ctx_a; + const uint state1 = ctx->ctx_b; + const uint state2 = ctx->ctx_c; + const uint state3 = ctx->ctx_d; + const uint state4 = ctx->ctx_e; + const uint state5 = ctx->ctx_f; + const uint state6 = ctx->ctx_g; + const uint state7 = ctx->ctx_h; + const uint B1 = ctx->cty_b; + const uint C1 = ctx->cty_c; + const uint D1 = ctx->cty_d; + const uint F1 = ctx->cty_f; + const uint G1 = ctx->cty_g; + const uint H1 = ctx->cty_h; + + uint A, B, C, D, E, F, G, H; + uint W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15; + uint it, res = 0; + const uint myid = get_global_id(0); + + const uint tnonce = (ctx->nonce + myid)<<10; + + for(it = 0; it != 1024; it++) { + W3 = it ^ tnonce; + E = fcty_e + W3; A = state0 + E; E = E + fcty_e2; + D = D1 + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C1 ^ (A & (B1 ^ C1))) + K[ 4] + 0x80000000; H = H1 + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F1) | (G1 & (E | F1))); + C = C1 + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B1 ^ (H & (A ^ B1))) + K[ 5]; G = G1 + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F1 & (D | E))); + B = B1 + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[ 6]; F = F1 + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D))); + A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[ 7]; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C))); + H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[ 8]; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B))); + G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[ 9]; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A))); + F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[10]; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H))); + E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[11]; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G))); + D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[12]; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F))); + C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[13]; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E))); + B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[14]; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D))); + A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[15] + 0x00000280; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C))); + H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[16] + fW0; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B))); + G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[17] + fW1; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A))); + W2 = (rotr(W3, 7) ^ rotr(W3, 18) ^ (W3 >> 3)) + fW2; + F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[18] + W2; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H))); + W3 = W3 + fW3; + E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[19] + W3; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G))); + W4 = (rotr(W2, 17) ^ rotr(W2, 19) ^ (W2 >> 10)) + 0x80000000; + D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[20] + W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F))); + W5 = (rotr(W3, 17) ^ rotr(W3, 19) ^ (W3 >> 10)); + C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[21] + W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E))); + W6 = (rotr(W4, 17) ^ rotr(W4, 19) ^ (W4 >> 10)) + 0x00000280; + B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[22] + W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D))); + W7 = (rotr(W5, 17) ^ rotr(W5, 19) ^ (W5 >> 10)) + fW0; + A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[23] + W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C))); + W8 = (rotr(W6, 17) ^ rotr(W6, 19) ^ (W6 >> 10)) + fW1; + H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[24] + W8; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B))); + W9 = W2 + (rotr(W7, 17) ^ rotr(W7, 19) ^ (W7 >> 10)); + G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[25] + W9; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A))); + W10 = W3 + (rotr(W8, 17) ^ rotr(W8, 19) ^ (W8 >> 10)); + F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[26] + W10; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H))); + W11 = W4 + (rotr(W9, 17) ^ rotr(W9, 19) ^ (W9 >> 10)); + E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[27] + W11; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G))); + W12 = W5 + (rotr(W10, 17) ^ rotr(W10, 19) ^ (W10 >> 10)); + D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[28] + W12; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F))); + W13 = W6 + (rotr(W11, 17) ^ rotr(W11, 19) ^ (W11 >> 10)); + C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[29] + W13; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E))); + W14 = 0x00a00055 + W7 + (rotr(W12, 17) ^ rotr(W12, 19) ^ (W12 >> 10)); + B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[30] + W14; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D))); + W15 = fW15 + W8 + (rotr(W13, 17) ^ rotr(W13, 19) ^ (W13 >> 10)); + A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[31] + W15; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C))); + W0 = fW01r + W9 + (rotr(W14, 17) ^ rotr(W14, 19) ^ (W14 >> 10)); + H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[32] + W0; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B))); + W1 = fW1 + (rotr(W2, 7) ^ rotr(W2, 18) ^ (W2 >> 3)) + W10 + (rotr(W15, 17) ^ rotr(W15, 19) ^ (W15 >> 10)); + G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[33] + W1; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A))); + W2 = W2 + (rotr(W3, 7) ^ rotr(W3, 18) ^ (W3 >> 3)) + W11 + (rotr(W0, 17) ^ rotr(W0, 19) ^ (W0 >> 10)); + F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[34] + W2; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H))); + W3 = W3 + (rotr(W4, 7) ^ rotr(W4, 18) ^ (W4 >> 3)) + W12 + (rotr(W1, 17) ^ rotr(W1, 19) ^ (W1 >> 10)); + E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[35] + W3; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G))); + W4 = W4 + (rotr(W5, 7) ^ rotr(W5, 18) ^ (W5 >> 3)) + W13 + (rotr(W2, 17) ^ rotr(W2, 19) ^ (W2 >> 10)); + D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[36] + W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F))); + W5 = W5 + (rotr(W6, 7) ^ rotr(W6, 18) ^ (W6 >> 3)) + W14 + (rotr(W3, 17) ^ rotr(W3, 19) ^ (W3 >> 10)); + C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[37] + W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E))); + W6 = W6 + (rotr(W7, 7) ^ rotr(W7, 18) ^ (W7 >> 3)) + W15 + (rotr(W4, 17) ^ rotr(W4, 19) ^ (W4 >> 10)); + B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[38] + W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D))); + W7 = W7 + (rotr(W8, 7) ^ rotr(W8, 18) ^ (W8 >> 3)) + W0 + (rotr(W5, 17) ^ rotr(W5, 19) ^ (W5 >> 10)); + A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[39] + W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C))); + W8 = W8 + (rotr(W9, 7) ^ rotr(W9, 18) ^ (W9 >> 3)) + W1 + (rotr(W6, 17) ^ rotr(W6, 19) ^ (W6 >> 10)); + H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[40] + W8; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B))); + W9 = W9 + (rotr(W10, 7) ^ rotr(W10, 18) ^ (W10 >> 3)) + W2 + (rotr(W7, 17) ^ rotr(W7, 19) ^ (W7 >> 10)); + G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[41] + W9; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A))); + W10 = W10 + (rotr(W11, 7) ^ rotr(W11, 18) ^ (W11 >> 3)) + W3 + (rotr(W8, 17) ^ rotr(W8, 19) ^ (W8 >> 10)); + F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[42] + W10; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H))); + W11 = W11 + (rotr(W12, 7) ^ rotr(W12, 18) ^ (W12 >> 3)) + W4 + (rotr(W9, 17) ^ rotr(W9, 19) ^ (W9 >> 10)); + E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[43] + W11; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G))); + W12 = W12 + (rotr(W13, 7) ^ rotr(W13, 18) ^ (W13 >> 3)) + W5 + (rotr(W10, 17) ^ rotr(W10, 19) ^ (W10 >> 10)); + D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[44] + W12; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F))); + W13 = W13 + (rotr(W14, 7) ^ rotr(W14, 18) ^ (W14 >> 3)) + W6 + (rotr(W11, 17) ^ rotr(W11, 19) ^ (W11 >> 10)); + C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[45] + W13; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E))); + W14 = W14 + (rotr(W15, 7) ^ rotr(W15, 18) ^ (W15 >> 3)) + W7 + (rotr(W12, 17) ^ rotr(W12, 19) ^ (W12 >> 10)); + B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[46] + W14; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D))); + W15 = W15 + (rotr(W0, 7) ^ rotr(W0, 18) ^ (W0 >> 3)) + W8 + (rotr(W13, 17) ^ rotr(W13, 19) ^ (W13 >> 10)); + A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[47] + W15; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C))); + W0 = W0 + (rotr(W1, 7) ^ rotr(W1, 18) ^ (W1 >> 3)) + W9 + (rotr(W14, 17) ^ rotr(W14, 19) ^ (W14 >> 10)); + H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[48] + W0; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B))); + W1 = W1 + (rotr(W2, 7) ^ rotr(W2, 18) ^ (W2 >> 3)) + W10 + (rotr(W15, 17) ^ rotr(W15, 19) ^ (W15 >> 10)); + G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[49] + W1; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A))); + W2 = W2 + (rotr(W3, 7) ^ rotr(W3, 18) ^ (W3 >> 3)) + W11 + (rotr(W0, 17) ^ rotr(W0, 19) ^ (W0 >> 10)); + F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[50] + W2; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H))); + W3 = W3 + (rotr(W4, 7) ^ rotr(W4, 18) ^ (W4 >> 3)) + W12 + (rotr(W1, 17) ^ rotr(W1, 19) ^ (W1 >> 10)); + E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[51] + W3; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G))); + W4 = W4 + (rotr(W5, 7) ^ rotr(W5, 18) ^ (W5 >> 3)) + W13 + (rotr(W2, 17) ^ rotr(W2, 19) ^ (W2 >> 10)); + D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[52] + W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F))); + W5 = W5 + (rotr(W6, 7) ^ rotr(W6, 18) ^ (W6 >> 3)) + W14 + (rotr(W3, 17) ^ rotr(W3, 19) ^ (W3 >> 10)); + C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[53] + W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E))); + W6 = W6 + (rotr(W7, 7) ^ rotr(W7, 18) ^ (W7 >> 3)) + W15 + (rotr(W4, 17) ^ rotr(W4, 19) ^ (W4 >> 10)); + B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[54] + W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D))); + W7 = W7 + (rotr(W8, 7) ^ rotr(W8, 18) ^ (W8 >> 3)) + W0 + (rotr(W5, 17) ^ rotr(W5, 19) ^ (W5 >> 10)); + A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[55] + W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C))); + W8 = W8 + (rotr(W9, 7) ^ rotr(W9, 18) ^ (W9 >> 3)) + W1 + (rotr(W6, 17) ^ rotr(W6, 19) ^ (W6 >> 10)); + H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[56] + W8; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B))); + W9 = W9 + (rotr(W10, 7) ^ rotr(W10, 18) ^ (W10 >> 3)) + W2 + (rotr(W7, 17) ^ rotr(W7, 19) ^ (W7 >> 10)); + G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[57] + W9; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A))); + W10 = W10 + (rotr(W11, 7) ^ rotr(W11, 18) ^ (W11 >> 3)) + W3 + (rotr(W8, 17) ^ rotr(W8, 19) ^ (W8 >> 10)); + F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[58] + W10; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H))); + W11 = W11 + (rotr(W12, 7) ^ rotr(W12, 18) ^ (W12 >> 3)) + W4 + (rotr(W9, 17) ^ rotr(W9, 19) ^ (W9 >> 10)); + E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[59] + W11; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G))); + W12 = W12 + (rotr(W13, 7) ^ rotr(W13, 18) ^ (W13 >> 3)) + W5 + (rotr(W10, 17) ^ rotr(W10, 19) ^ (W10 >> 10)); + D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[60] + W12; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F))); + W13 = W13 + (rotr(W14, 7) ^ rotr(W14, 18) ^ (W14 >> 3)) + W6 + (rotr(W11, 17) ^ rotr(W11, 19) ^ (W11 >> 10)); + C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[61] + W13; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E))); + W14 = W14 + (rotr(W15, 7) ^ rotr(W15, 18) ^ (W15 >> 3)) + W7 + (rotr(W12, 17) ^ rotr(W12, 19) ^ (W12 >> 10)); + B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[62] + W14; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D))); + W15 = W15 + (rotr(W0, 7) ^ rotr(W0, 18) ^ (W0 >> 3)) + W8 + (rotr(W13, 17) ^ rotr(W13, 19) ^ (W13 >> 10)); + A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[63] + W15; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C))); + + W0 = A + state0; W1 = B + state1; + W2 = C + state2; W3 = D + state3; + W4 = E + state4; W5 = F + state5; + W6 = G + state6; W7 = H + state7; + H = 0xb0edbdd0 + K[ 0] + W0; D = 0xa54ff53a + H; H = H + 0x08909ae5; + G = 0x1f83d9ab + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (0x9b05688c ^ (D & 0xca0b3af3)) + K[ 1] + W1; C = 0x3c6ef372 + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & 0x6a09e667) | (0xbb67ae85 & (H | 0x6a09e667))); + F = 0x9b05688c + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (0x510e527f ^ (C & (D ^ 0x510e527f))) + K[ 2] + W2; B = 0xbb67ae85 + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (0x6a09e667 & (G | H))); + E = 0x510e527f + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[ 3] + W3; A = 0x6a09e667 + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G))); + D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[ 4] + W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F))); + C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[ 5] + W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E))); + B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[ 6] + W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D))); + A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[ 7] + W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C))); + H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[ 8] + 0x80000000; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B))); + G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[ 9]; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A))); + F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[10]; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H))); + E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[11]; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G))); + D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[12]; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F))); + C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[13]; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E))); + B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[14]; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D))); + A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[15] + 0x00000100; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C))); + W0 = W0 + (rotr(W1, 7) ^ rotr(W1, 18) ^ (W1 >> 3)); + H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[16] + W0; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B))); + W1 = W1 + (rotr(W2, 7) ^ rotr(W2, 18) ^ (W2 >> 3)) + 0x00a00000; + G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[17] + W1; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A))); + W2 = W2 + (rotr(W3, 7) ^ rotr(W3, 18) ^ (W3 >> 3)) + (rotr(W0, 17) ^ rotr(W0, 19) ^ (W0 >> 10)); + F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[18] + W2; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H))); + W3 = W3 + (rotr(W4, 7) ^ rotr(W4, 18) ^ (W4 >> 3)) + (rotr(W1, 17) ^ rotr(W1, 19) ^ (W1 >> 10)); + E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[19] + W3; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G))); + W4 = W4 + (rotr(W5, 7) ^ rotr(W5, 18) ^ (W5 >> 3)) + (rotr(W2, 17) ^ rotr(W2, 19) ^ (W2 >> 10)); + D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[20] + W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F))); + W5 = W5 + (rotr(W6, 7) ^ rotr(W6, 18) ^ (W6 >> 3)) + (rotr(W3, 17) ^ rotr(W3, 19) ^ (W3 >> 10)); + C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[21] + W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E))); + W6 = W6 + (rotr(W7, 7) ^ rotr(W7, 18) ^ (W7 >> 3)) + 0x00000100 + (rotr(W4, 17) ^ rotr(W4, 19) ^ (W4 >> 10)); + B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[22] + W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D))); + W7 = W7 + 0x11002000 + W0 + (rotr(W5, 17) ^ rotr(W5, 19) ^ (W5 >> 10)); + A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[23] + W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C))); + W8 = 0x80000000 + W1 + (rotr(W6, 17) ^ rotr(W6, 19) ^ (W6 >> 10)); + H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[24] + W8; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B))); + W9 = W2 + (rotr(W7, 17) ^ rotr(W7, 19) ^ (W7 >> 10)); + G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[25] + W9; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A))); + W10 = W3 + (rotr(W8, 17) ^ rotr(W8, 19) ^ (W8 >> 10)); + F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[26] + W10; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H))); + W11 = W4 + (rotr(W9, 17) ^ rotr(W9, 19) ^ (W9 >> 10)); + E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[27] + W11; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G))); + W12 = W5 + (rotr(W10, 17) ^ rotr(W10, 19) ^ (W10 >> 10)); + D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[28] + W12; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F))); + W13 = W6 + (rotr(W11, 17) ^ rotr(W11, 19) ^ (W11 >> 10)); + C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[29] + W13; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E))); + W14 = 0x00400022 + W7 + (rotr(W12, 17) ^ rotr(W12, 19) ^ (W12 >> 10)); + B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[30] + W14; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D))); + W15 = 0x00000100 + (rotr(W0, 7) ^ rotr(W0, 18) ^ (W0 >> 3)) + W8 + (rotr(W13, 17) ^ rotr(W13, 19) ^ (W13 >> 10)); + A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[31] + W15; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C))); + W0 = W0 + (rotr(W1, 7) ^ rotr(W1, 18) ^ (W1 >> 3)) + W9 + (rotr(W14, 17) ^ rotr(W14, 19) ^ (W14 >> 10)); + H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[32] + W0; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B))); + W1 = W1 + (rotr(W2, 7) ^ rotr(W2, 18) ^ (W2 >> 3)) + W10 + (rotr(W15, 17) ^ rotr(W15, 19) ^ (W15 >> 10)); + G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[33] + W1; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A))); + W2 = W2 + (rotr(W3, 7) ^ rotr(W3, 18) ^ (W3 >> 3)) + W11 + (rotr(W0, 17) ^ rotr(W0, 19) ^ (W0 >> 10)); + F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[34] + W2; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H))); + W3 = W3 + (rotr(W4, 7) ^ rotr(W4, 18) ^ (W4 >> 3)) + W12 + (rotr(W1, 17) ^ rotr(W1, 19) ^ (W1 >> 10)); + E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[35] + W3; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G))); + W4 = W4 + (rotr(W5, 7) ^ rotr(W5, 18) ^ (W5 >> 3)) + W13 + (rotr(W2, 17) ^ rotr(W2, 19) ^ (W2 >> 10)); + D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[36] + W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F))); + W5 = W5 + (rotr(W6, 7) ^ rotr(W6, 18) ^ (W6 >> 3)) + W14 + (rotr(W3, 17) ^ rotr(W3, 19) ^ (W3 >> 10)); + C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[37] + W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E))); + W6 = W6 + (rotr(W7, 7) ^ rotr(W7, 18) ^ (W7 >> 3)) + W15 + (rotr(W4, 17) ^ rotr(W4, 19) ^ (W4 >> 10)); + B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[38] + W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D))); + W7 = W7 + (rotr(W8, 7) ^ rotr(W8, 18) ^ (W8 >> 3)) + W0 + (rotr(W5, 17) ^ rotr(W5, 19) ^ (W5 >> 10)); + A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[39] + W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C))); + W8 = W8 + (rotr(W9, 7) ^ rotr(W9, 18) ^ (W9 >> 3)) + W1 + (rotr(W6, 17) ^ rotr(W6, 19) ^ (W6 >> 10)); + H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[40] + W8; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B))); + W9 = W9 + (rotr(W10, 7) ^ rotr(W10, 18) ^ (W10 >> 3)) + W2 + (rotr(W7, 17) ^ rotr(W7, 19) ^ (W7 >> 10)); + G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[41] + W9; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A))); + W10 = W10 + (rotr(W11, 7) ^ rotr(W11, 18) ^ (W11 >> 3)) + W3 + (rotr(W8, 17) ^ rotr(W8, 19) ^ (W8 >> 10)); + F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[42] + W10; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H))); + W11 = W11 + (rotr(W12, 7) ^ rotr(W12, 18) ^ (W12 >> 3)) + W4 + (rotr(W9, 17) ^ rotr(W9, 19) ^ (W9 >> 10)); + E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[43] + W11; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G))); + W12 = W12 + (rotr(W13, 7) ^ rotr(W13, 18) ^ (W13 >> 3)) + W5 + (rotr(W10, 17) ^ rotr(W10, 19) ^ (W10 >> 10)); + D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[44] + W12; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F))); + W13 = W13 + (rotr(W14, 7) ^ rotr(W14, 18) ^ (W14 >> 3)) + W6 + (rotr(W11, 17) ^ rotr(W11, 19) ^ (W11 >> 10)); + C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[45] + W13; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E))); + W14 = W14 + (rotr(W15, 7) ^ rotr(W15, 18) ^ (W15 >> 3)) + W7 + (rotr(W12, 17) ^ rotr(W12, 19) ^ (W12 >> 10)); + B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[46] + W14; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D))); + W15 = W15 + (rotr(W0, 7) ^ rotr(W0, 18) ^ (W0 >> 3)) + W8 + (rotr(W13, 17) ^ rotr(W13, 19) ^ (W13 >> 10)); + A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[47] + W15; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C))); + W0 = W0 + (rotr(W1, 7) ^ rotr(W1, 18) ^ (W1 >> 3)) + W9 + (rotr(W14, 17) ^ rotr(W14, 19) ^ (W14 >> 10)); + H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[48] + W0; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B))); + W1 = W1 + (rotr(W2, 7) ^ rotr(W2, 18) ^ (W2 >> 3)) + W10 + (rotr(W15, 17) ^ rotr(W15, 19) ^ (W15 >> 10)); + G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[49] + W1; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A))); + W2 = W2 + (rotr(W3, 7) ^ rotr(W3, 18) ^ (W3 >> 3)) + W11 + (rotr(W0, 17) ^ rotr(W0, 19) ^ (W0 >> 10)); + F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[50] + W2; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H))); + W3 = W3 + (rotr(W4, 7) ^ rotr(W4, 18) ^ (W4 >> 3)) + W12 + (rotr(W1, 17) ^ rotr(W1, 19) ^ (W1 >> 10)); + E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[51] + W3; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G))); + W4 = W4 + (rotr(W5, 7) ^ rotr(W5, 18) ^ (W5 >> 3)) + W13 + (rotr(W2, 17) ^ rotr(W2, 19) ^ (W2 >> 10)); + D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[52] + W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F))); + W5 = W5 + (rotr(W6, 7) ^ rotr(W6, 18) ^ (W6 >> 3)) + W14 + (rotr(W3, 17) ^ rotr(W3, 19) ^ (W3 >> 10)); + C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[53] + W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E))); + W6 = W6 + (rotr(W7, 7) ^ rotr(W7, 18) ^ (W7 >> 3)) + W15 + (rotr(W4, 17) ^ rotr(W4, 19) ^ (W4 >> 10)); + B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[54] + W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D))); + W7 = W7 + (rotr(W8, 7) ^ rotr(W8, 18) ^ (W8 >> 3)) + W0 + (rotr(W5, 17) ^ rotr(W5, 19) ^ (W5 >> 10)); + A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[55] + W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C))); + W8 = W8 + (rotr(W9, 7) ^ rotr(W9, 18) ^ (W9 >> 3)) + W1 + (rotr(W6, 17) ^ rotr(W6, 19) ^ (W6 >> 10)); + H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[56] + W8; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B))); + W9 = W9 + (rotr(W10, 7) ^ rotr(W10, 18) ^ (W10 >> 3)) + W2 + (rotr(W7, 17) ^ rotr(W7, 19) ^ (W7 >> 10)); + G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[57] + W9; C = C + G; + W10 = W10 + (rotr(W11, 7) ^ rotr(W11, 18) ^ (W11 >> 3)) + W3 + (rotr(W8, 17) ^ rotr(W8, 19) ^ (W8 >> 10)); + F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[58] + W10; B = B + F; + W11 = W11 + (rotr(W12, 7) ^ rotr(W12, 18) ^ (W12 >> 3)) + W4 + (rotr(W9, 17) ^ rotr(W9, 19) ^ (W9 >> 10)); + E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[59] + W11; A = A + E; + W12 = W12 + (rotr(W13, 7) ^ rotr(W13, 18) ^ (W13 >> 3)) + W5 + (rotr(W10, 17) ^ rotr(W10, 19) ^ (W10 >> 10)); + D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[60] + W12; H = H + D; + + res |= (H==0xa41f32e7); + } + + output[myid] = res; +}