1
0
mirror of https://github.com/GOSTSec/sgminer synced 2025-01-10 14:58:01 +00:00

Merge gpumining from oclmine. Unstable.

This commit is contained in:
Con Kolivas 2011-06-14 10:32:54 +10:00
parent 51817422fc
commit dde7039726
9 changed files with 1032 additions and 20 deletions

View File

@ -15,10 +15,11 @@ bin_PROGRAMS = minerd
minerd_SOURCES = elist.h miner.h compat.h \
cpu-miner.c util.c \
ocl.c findnonce.c \
sha256_generic.c sha256_4way.c sha256_via.c \
sha256_cryptopp.c sha256_sse2_amd64.c
minerd_LDFLAGS = $(PTHREAD_FLAGS)
minerd_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@
minerd_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @OPENCL_LIBS@
minerd_CPPFLAGS = @LIBCURL_CPPFLAGS@
if HAVE_x86_64

View File

@ -40,6 +40,7 @@ case $target in
esac
AC_CHECK_LIB(OpenCL, clSetKernelArg, OPENCL_LIBS=-lOpenCL)
AC_CHECK_LIB(jansson, json_loads, request_jansson=false, request_jansson=true)
AC_CHECK_LIB(pthread, pthread_create, PTHREAD_LIBS=-lpthread)
@ -95,6 +96,7 @@ PKG_PROG_PKG_CONFIG()
LIBCURL_CHECK_CONFIG(, 7.10.1, ,
[AC_MSG_ERROR([Missing required libcurl >= 7.10.1])])
AC_SUBST(OPENCL_LIBS)
AC_SUBST(JANSSON_LIBS)
AC_SUBST(PTHREAD_FLAGS)
AC_SUBST(PTHREAD_LIBS)

View File

@ -19,6 +19,7 @@
#include <unistd.h>
#include <sys/time.h>
#include <time.h>
#include <math.h>
#ifndef WIN32
#include <sys/resource.h>
#endif
@ -27,6 +28,8 @@
#include <curl/curl.h>
#include "compat.h"
#include "miner.h"
#include "findnonce.h"
#include "ocl.h"
#define PROGRAM_NAME "minerd"
#define DEF_RPC_URL "http://127.0.0.1:8332/"
@ -108,6 +111,7 @@ static const char *algo_names[] = {
bool opt_debug = false;
bool opt_protocol = false;
bool opt_ndevs = false;
bool want_longpoll = true;
bool have_longpoll = false;
bool use_syslog = false;
@ -122,6 +126,7 @@ static enum sha256_algos opt_algo = ALGO_SSE2_64;
#else
static enum sha256_algos opt_algo = ALGO_C;
#endif
static int nDevs;
static int opt_n_threads;
static int num_processors;
static char *rpc_url;
@ -135,7 +140,7 @@ pthread_mutex_t time_lock;
static pthread_mutex_t hash_lock;
static unsigned long total_hashes_done;
static struct timeval total_tv_start;
static int solutions;
static int accepted, rejected;
struct option_help {
@ -175,6 +180,9 @@ static struct option_help options_help[] = {
{ "debug",
"(-D) Enable debug output (default: off)" },
{ "ndevs",
"(-n) Display number of detected GPUs" },
{ "no-longpoll",
"Disable X-Long-Polling support (default: enabled)" },
@ -223,6 +231,7 @@ static struct option options[] = {
{ "config", 1, NULL, 'c' },
{ "debug", 0, NULL, 'D' },
{ "help", 0, NULL, 'h' },
{ "ndevs", 0, NULL, 'n' },
{ "no-longpoll", 0, NULL, 1003 },
{ "pass", 1, NULL, 'p' },
{ "protocol-dump", 0, NULL, 'P' },
@ -237,8 +246,6 @@ static struct option options[] = {
{ "url", 1, NULL, 1001 },
{ "user", 1, NULL, 'u' },
{ "userpass", 1, NULL, 1002 },
{ }
};
struct work {
@ -248,6 +255,12 @@ struct work {
unsigned char target[32];
unsigned char hash[32];
uint32_t output[MAXTHREADS];
uint32_t res_nonce;
uint32_t valid;
uint32_t ready;
dev_blk_ctx blk;
};
static bool jobj_binary(const json_t *obj, const char *key,
@ -335,10 +348,12 @@ static bool submit_upstream_work(CURL *curl, const struct work *work)
res = json_object_get(val, "result");
if (json_is_true(res)) {
solutions++;
accepted++;
applog(LOG_INFO, "PROOF OF WORK RESULT: true (yay!!!)");
} else
} else {
rejected++;
applog(LOG_INFO, "PROOF OF WORK RESULT: false (booooo)");
}
json_decref(val);
@ -493,7 +508,7 @@ static void hashmeter(int thr_id, struct timeval *diff,
khashes = hashes_done / 1000.0;
secs = (double)diff->tv_sec + ((double)diff->tv_usec / 1000000.0);
if (opt_n_threads > 1) {
if (opt_n_threads + nDevs > 1) {
double total_mhashes, total_secs;
/* Totals are updated by all threads so can race without locking */
@ -505,13 +520,17 @@ static void hashmeter(int thr_id, struct timeval *diff,
pthread_mutex_unlock(&hash_lock);
total_secs = (double)total_diff.tv_sec +
((double)total_diff.tv_usec / 1000000.0);
applog(LOG_INFO, "[Total: %.2f Mhash/sec] "
"[thread %d: %lu hashes, %.0f khash/sec] [Solved: %d]",
total_mhashes / total_secs, thr_id, hashes_done,
khashes / secs, solutions);
if (opt_debug)
applog(LOG_DEBUG, "[thread %d: %lu hashes, %.0f khash/sec]",
thr_id, hashes_done);
if (!thr_id)
applog(LOG_INFO, "[%.2f Mhash/sec] [%d Accepted] [%d Rejected]",
total_mhashes / total_secs, accepted, rejected);
} else {
applog(LOG_INFO, "[%lu hashes, %.0f khash/sec] [Solved: %d]",
hashes_done, khashes / secs, solutions);
if (opt_debug)
applog(LOG_DEBUG, "[%lu hashes]", hashes_done);
applog(LOG_INFO, "%.0f khash/sec] [%d Accepted] [%d Rejected]",
khashes / secs, accepted, rejected);
}
}
@ -574,6 +593,15 @@ err_out:
return false;
}
bool submit_nonce(struct thr_info *thr, struct work *work, uint32_t nonce)
{
work->data[64+12+0] = (nonce>>0) & 0xff;
work->data[64+12+1] = (nonce>>8) & 0xff;
work->data[64+12+2] = (nonce>>16) & 0xff;
work->data[64+12+3] = (nonce>>24) & 0xff;
return submit_work(thr, work);
}
static void *miner_thread(void *userdata)
{
struct thr_info *mythr = userdata;
@ -693,11 +721,162 @@ out:
return NULL;
}
enum {
STAT_SLEEP_INTERVAL = 1,
STAT_CTR_INTERVAL = 10000000,
FAILURE_INTERVAL = 30,
};
static int block = 0;
static _clState *clStates[16];
static void *gpuminer_thread(void *userdata)
{
struct thr_info *mythr = userdata;
int thr_id = mythr->id;
int failures = 0;
uint32_t res[MAXTHREADS];
setpriority(PRIO_PROCESS, 0, 19);
drop_policy();
size_t globalThreads[1];
size_t localThreads[1];
cl_int status;
_clState *clState = clStates[thr_id];
status = clSetKernelArg(clState->kernel, 0, sizeof(cl_mem), (void *)&clState->inputBuffer);
if(status != CL_SUCCESS) { printf("Error: Setting kernel argument 1.\n"); return false; }
status = clSetKernelArg(clState->kernel, 1, sizeof(cl_mem), (void *)&clState->outputBuffer);
if(status != CL_SUCCESS) { printf("Error: Setting kernel argument 2.\n"); return false; }
struct work *work;
work = malloc(sizeof(struct work)*2);
work[0].ready = 0;
work[1].ready = 0;
int frame = 0;
int res_frame = 0;
int my_block = block;
bool need_work = true;
unsigned long hashes_done;
hashes_done = 0;
unsigned int h0count = 0;
while (1) {
struct timeval tv_start, tv_end, diff;
int threads;
bool rc;
gettimeofday(&tv_start, NULL);
if (need_work || my_block != block) {
frame++;
frame %= 2;
if (opt_debug)
fprintf(stderr, "getwork\n");
/* obtain new work from internal workio thread */
if (unlikely(!get_work(mythr, work + frame))) {
applog(LOG_ERR, "work retrieval failed, exiting "
"gpu mining thread %d", mythr->id);
goto out;
}
precalc_hash(&work[frame].blk, (uint32_t *)(work[frame].midstate), (uint32_t *)(work[frame].data + 64));
work[frame].blk.nonce = 0;
work[frame].valid = true;
work[frame].ready = 0;
my_block = block;
need_work = false;
}
threads = 102400 * 4;
globalThreads[0] = threads;
localThreads[0] = 128;
status = clEnqueueWriteBuffer(clState->commandQueue, clState->inputBuffer, CL_TRUE, 0,
sizeof(dev_blk_ctx), (void *)&work[frame].blk, 0, NULL, NULL);
if(status != CL_SUCCESS) { printf("Error: clEnqueueWriteBuffer failed.\n"); goto out; }
clFinish(clState->commandQueue);
status = clEnqueueNDRangeKernel(clState->commandQueue, clState->kernel, 1, NULL,
globalThreads, localThreads, 0, NULL, NULL);
if (status != CL_SUCCESS) { printf("Error: Enqueueing kernel onto command queue. (clEnqueueNDRangeKernel)\n"); goto out; }
clFlush(clState->commandQueue);
hashes_done = 1024 * threads;
if (work[res_frame].ready) {
rc = false;
uint32_t bestG = ~0;
uint32_t nonce;
int j;
for(j = 0; j < work[res_frame].ready; j++) {
if(res[j]) {
uint32_t start = (work[res_frame].res_nonce + j)<<10;
uint32_t my_g, my_nonce;
my_g = postcalc_hash(mythr, &work[res_frame].blk, &work[res_frame], start, start + 1026, &my_nonce, &h0count);
rc = true;
}
}
work[res_frame].ready = false;
uint32_t *target = (uint32_t *)(work[res_frame].target + 24);
}
gettimeofday(&tv_end, NULL);
timeval_subtract(&diff, &tv_end, &tv_start);
hashmeter(thr_id, &diff, hashes_done);
/* adjust max_nonce to meet target scan time */
if (diff.tv_usec > 500000)
diff.tv_sec++;
if (diff.tv_sec > 0)
applog(LOG_INFO, "Not reaching opt_scantime by %d", diff.tv_sec);
status = clEnqueueReadBuffer(clState->commandQueue, clState->outputBuffer, CL_TRUE, 0,
sizeof(uint32_t) * threads, res, 0, NULL, NULL);
if (status != CL_SUCCESS) { printf("Error: clEnqueueReadBuffer failed. (clEnqueueReadBuffer)\n"); goto out;}
res_frame = frame;
work[res_frame].ready = threads;
work[res_frame].res_nonce = work[res_frame].blk.nonce;
work[frame].blk.nonce += threads;
if (unlikely(work[frame].blk.nonce > 4000000 - threads))
need_work = true;
failures = 0;
}
out:
tq_freeze(mythr->q);
return NULL;
}
static void restart_threads(void)
{
int i;
for (i = 0; i < opt_n_threads; i++)
for (i = 0; i < opt_n_threads + nDevs; i++)
work_restart[i].restart = 1;
}
@ -948,6 +1127,13 @@ int main (int argc, char *argv[])
{
struct thr_info *thr;
int i;
char name[32];
nDevs = clDevicesNum();
if (opt_ndevs) {
printf("%i\n", nDevs);
return nDevs;
}
rpc_url = strdup(DEF_RPC_URL);
@ -975,16 +1161,16 @@ int main (int argc, char *argv[])
openlog("cpuminer", LOG_PID, LOG_USER);
#endif
work_restart = calloc(opt_n_threads, sizeof(*work_restart));
work_restart = calloc(opt_n_threads + nDevs, sizeof(*work_restart));
if (!work_restart)
return 1;
thr_info = calloc(opt_n_threads + 2, sizeof(*thr));
thr_info = calloc(opt_n_threads + 2 + nDevs, sizeof(*thr));
if (!thr_info)
return 1;
/* init workio thread info */
work_thr_id = opt_n_threads;
work_thr_id = opt_n_threads + nDevs;
thr = &thr_info[work_thr_id];
thr->id = work_thr_id;
thr->q = tq_new();
@ -999,7 +1185,7 @@ int main (int argc, char *argv[])
/* init longpoll thread info */
if (want_longpoll) {
longpoll_thr_id = opt_n_threads + 1;
longpoll_thr_id = opt_n_threads + nDevs + 1;
thr = &thr_info[longpoll_thr_id];
thr->id = longpoll_thr_id;
thr->q = tq_new();
@ -1015,8 +1201,32 @@ int main (int argc, char *argv[])
longpoll_thr_id = -1;
gettimeofday(&total_tv_start, NULL);
/* start gpu mining threads */
for (i = 0; i < nDevs; i++) {
thr = &thr_info[i];
thr->id = i;
thr->q = tq_new();
if (!thr->q)
return 1;
printf("Init GPU %i\n", i);
clStates[i] = initCl(i, name, sizeof(name));
printf("initCl() finished. Found %s\n", name);
if (unlikely(pthread_create(&thr->pth, NULL, gpuminer_thread, thr))) {
applog(LOG_ERR, "thread %d create failed", i);
return 1;
}
sleep(1); /* don't pound RPC server all at once */
}
fprintf(stderr, "%d gpu miner threads started\n", i);
/* start mining threads */
for (i = 0; i < opt_n_threads; i++) {
for (i = nDevs; i < nDevs + opt_n_threads; i++) {
thr = &thr_info[i];
thr->id = i;
@ -1032,7 +1242,7 @@ int main (int argc, char *argv[])
sleep(1); /* don't pound RPC server all at once */
}
applog(LOG_INFO, "%d miner threads started, "
applog(LOG_INFO, "%d cpu miner threads started, "
"using SHA256 '%s' algorithm.",
opt_n_threads,
algo_names[opt_algo]);

197
findnonce.c Normal file
View File

@ -0,0 +1,197 @@
/*
* Copyright 2011 Nils Schneider
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version. See COPYING for more details.
*/
#include <stdio.h>
#include <inttypes.h>
#include "ocl.h"
#include "findnonce.h"
const uint32_t SHA256_K[64] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
inline uint32_t ByteReverse(uint32_t value)
{
__asm__ ("bswap %0" : "=r" (value) : "0" (value));
return value;
}
#define rotate(x,y) ((x<<y) | (x>>(sizeof(x)*8-y)))
#define rotr(x,y) ((x>>y) | (x<<(sizeof(x)*8-y)))
#define R(a, b, c, d, e, f, g, h, w, k) \
h = h + (rotate(e, 26) ^ rotate(e, 21) ^ rotate(e, 7)) + (g ^ (e & (f ^ g))) + k + w; \
d = d + h; \
h = h + (rotate(a, 30) ^ rotate(a, 19) ^ rotate(a, 10)) + ((a & b) | (c & (a | b)))
void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data) {
cl_uint A, B, C, D, E, F, G, H;
A = state[0];
B = state[1];
C = state[2];
D = state[3];
E = state[4];
F = state[5];
G = state[6];
H = state[7];
R(A, B, C, D, E, F, G, H, data[0], SHA256_K[0]);
R(H, A, B, C, D, E, F, G, data[1], SHA256_K[1]);
R(G, H, A, B, C, D, E, F, data[2], SHA256_K[2]);
blk->cty_a = A;
blk->cty_b = B;
blk->cty_c = C;
blk->cty_d = D;
blk->cty_e = E;
blk->cty_f = F;
blk->cty_g = G;
blk->cty_h = H;
blk->ctx_a = state[0];
blk->ctx_b = state[1];
blk->ctx_c = state[2];
blk->ctx_d = state[3];
blk->ctx_e = state[4];
blk->ctx_f = state[5];
blk->ctx_g = state[6];
blk->ctx_h = state[7];
blk->merkle = data[0];
blk->ntime = data[1];
blk->nbits = data[2];
blk->fW0 = data[0] + (rotr(data[1], 7) ^ rotr(data[1], 18) ^ (data[1] >> 3));
blk->fW1 = data[1] + (rotr(data[2], 7) ^ rotr(data[2], 18) ^ (data[2] >> 3)) + 0x01100000;
blk->fW2 = data[2] + (rotr(blk->fW0, 17) ^ rotr(blk->fW0, 19) ^ (blk->fW0 >> 10));
blk->fW3 = 0x11002000 + (rotr(blk->fW1, 17) ^ rotr(blk->fW1, 19) ^ (blk->fW1 >> 10));
blk->fW15 = 0x00000280 + (rotr(blk->fW0, 7) ^ rotr(blk->fW0, 18) ^ (blk->fW0 >> 3));
blk->fW01r = blk->fW0 + (rotr(blk->fW1, 7) ^ rotr(blk->fW1, 18) ^ (blk->fW1 >> 3));
blk->fcty_e = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + 0xe9b5dba5;
blk->fcty_e2 = (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
}
#define P(t) (W[(t)&0xF] = W[(t-16)&0xF] + (rotate(W[(t-15)&0xF], 25) ^ rotate(W[(t-15)&0xF], 14) ^ (W[(t-15)&0xF] >> 3)) + W[(t-7)&0xF] + (rotate(W[(t-2)&0xF], 15) ^ rotate(W[(t-2)&0xF], 13) ^ (W[(t-2)&0xF] >> 10)))
#define IR(u) \
R(A, B, C, D, E, F, G, H, W[u+0], SHA256_K[u+0]); \
R(H, A, B, C, D, E, F, G, W[u+1], SHA256_K[u+1]); \
R(G, H, A, B, C, D, E, F, W[u+2], SHA256_K[u+2]); \
R(F, G, H, A, B, C, D, E, W[u+3], SHA256_K[u+3]); \
R(E, F, G, H, A, B, C, D, W[u+4], SHA256_K[u+4]); \
R(D, E, F, G, H, A, B, C, W[u+5], SHA256_K[u+5]); \
R(C, D, E, F, G, H, A, B, W[u+6], SHA256_K[u+6]); \
R(B, C, D, E, F, G, H, A, W[u+7], SHA256_K[u+7])
#define FR(u) \
R(A, B, C, D, E, F, G, H, P(u+0), SHA256_K[u+0]); \
R(H, A, B, C, D, E, F, G, P(u+1), SHA256_K[u+1]); \
R(G, H, A, B, C, D, E, F, P(u+2), SHA256_K[u+2]); \
R(F, G, H, A, B, C, D, E, P(u+3), SHA256_K[u+3]); \
R(E, F, G, H, A, B, C, D, P(u+4), SHA256_K[u+4]); \
R(D, E, F, G, H, A, B, C, P(u+5), SHA256_K[u+5]); \
R(C, D, E, F, G, H, A, B, P(u+6), SHA256_K[u+6]); \
R(B, C, D, E, F, G, H, A, P(u+7), SHA256_K[u+7])
#define PIR(u) \
R(F, G, H, A, B, C, D, E, W[u+3], SHA256_K[u+3]); \
R(E, F, G, H, A, B, C, D, W[u+4], SHA256_K[u+4]); \
R(D, E, F, G, H, A, B, C, W[u+5], SHA256_K[u+5]); \
R(C, D, E, F, G, H, A, B, W[u+6], SHA256_K[u+6]); \
R(B, C, D, E, F, G, H, A, W[u+7], SHA256_K[u+7])
#define PFR(u) \
R(A, B, C, D, E, F, G, H, P(u+0), SHA256_K[u+0]); \
R(H, A, B, C, D, E, F, G, P(u+1), SHA256_K[u+1]); \
R(G, H, A, B, C, D, E, F, P(u+2), SHA256_K[u+2]); \
R(F, G, H, A, B, C, D, E, P(u+3), SHA256_K[u+3]); \
R(E, F, G, H, A, B, C, D, P(u+4), SHA256_K[u+4]); \
R(D, E, F, G, H, A, B, C, P(u+5), SHA256_K[u+5])
uint32_t postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk,
struct work *work, uint32_t start, uint32_t end,
uint32_t *best_nonce, unsigned int *h0count)
{
cl_uint A, B, C, D, E, F, G, H;
cl_uint W[16];
cl_uint nonce;
cl_uint best_g = ~0;
work_restart[thr->id].restart = 0;
for (nonce = start; nonce != end; nonce+=1) {
A = blk->cty_a; B = blk->cty_b;
C = blk->cty_c; D = blk->cty_d;
E = blk->cty_e; F = blk->cty_f;
G = blk->cty_g; H = blk->cty_h;
W[0] = blk->merkle; W[1] = blk->ntime;
W[2] = blk->nbits; W[3] = nonce;;
W[4] = 0x80000000; W[5] = 0x00000000; W[6] = 0x00000000; W[7] = 0x00000000;
W[8] = 0x00000000; W[9] = 0x00000000; W[10] = 0x00000000; W[11] = 0x00000000;
W[12] = 0x00000000; W[13] = 0x00000000; W[14] = 0x00000000; W[15] = 0x00000280;
PIR(0); IR(8);
FR(16); FR(24);
FR(32); FR(40);
FR(48); FR(56);
W[0] = A + blk->ctx_a; W[1] = B + blk->ctx_b;
W[2] = C + blk->ctx_c; W[3] = D + blk->ctx_d;
W[4] = E + blk->ctx_e; W[5] = F + blk->ctx_f;
W[6] = G + blk->ctx_g; W[7] = H + blk->ctx_h;
W[8] = 0x80000000; W[9] = 0x00000000; W[10] = 0x00000000; W[11] = 0x00000000;
W[12] = 0x00000000; W[13] = 0x00000000; W[14] = 0x00000000; W[15] = 0x00000100;
A = 0x6a09e667; B = 0xbb67ae85;
C = 0x3c6ef372; D = 0xa54ff53a;
E = 0x510e527f; F = 0x9b05688c;
G = 0x1f83d9ab; H = 0x5be0cd19;
IR(0); IR(8);
FR(16); FR(24);
FR(32); FR(40);
FR(48); PFR(56);
if (unlikely(H == 0xA41F32E7)) {
(*h0count)++;
if (unlikely(submit_nonce(thr, work, nonce) == false)) {
applog(LOG_ERR, "Failed to submit work, exiting");
goto out;
}
G += 0x1f83d9ab;
G = ByteReverse(G);
if (G < best_g) {
*best_nonce = nonce;
best_g = G;
}
}
if (work_restart[thr->id].restart)
break;
}
out:
if (best_g == ~0) printf("No best_g found! Error in OpenCL code?\n");
return best_g;
}

23
findnonce.h Normal file
View File

@ -0,0 +1,23 @@
#define MAXTHREADS 2000000
#ifdef __APPLE_CC__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
#include "miner.h"
typedef struct {
cl_uint ctx_a; cl_uint ctx_b; cl_uint ctx_c; cl_uint ctx_d;
cl_uint ctx_e; cl_uint ctx_f; cl_uint ctx_g; cl_uint ctx_h;
cl_uint cty_a; cl_uint cty_b; cl_uint cty_c; cl_uint cty_d;
cl_uint cty_e; cl_uint cty_f; cl_uint cty_g; cl_uint cty_h;
cl_uint merkle; cl_uint ntime; cl_uint nbits; cl_uint nonce;
cl_uint fW0; cl_uint fW1; cl_uint fW2; cl_uint fW3; cl_uint fW15;
cl_uint fW01r; cl_uint fcty_e; cl_uint fcty_e2;
} dev_blk_ctx;
extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data);
extern uint32_t postcalc_hash(struct thr_info *thr, dev_blk_ctx *blk,
struct work *work, uint32_t start, uint32_t end,
uint32_t *best_nonce, unsigned int *h0count);

View File

@ -194,6 +194,8 @@ extern bool use_syslog;
extern struct thr_info *thr_info;
extern int longpoll_thr_id;
extern struct work_restart *work_restart;
struct work;
bool submit_nonce(struct thr_info *thr, struct work *work, uint32_t nonce);
extern void applog(int prio, const char *fmt, ...);
extern struct thread_q *tq_new(void);

271
ocl.c Normal file
View File

@ -0,0 +1,271 @@
#include <signal.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netdb.h>
#include <time.h>
#include <sys/time.h>
#include <pthread.h>
#include "findnonce.h"
#include "ocl.h"
char *file_contents(const char *filename, int *length)
{
FILE *f = fopen(filename, "r");
void *buffer;
if (!f) {
fprintf(stderr, "Unable to open %s for reading\n", filename);
return NULL;
}
fseek(f, 0, SEEK_END);
*length = ftell(f);
fseek(f, 0, SEEK_SET);
buffer = malloc(*length+1);
*length = fread(buffer, 1, *length, f);
fclose(f);
((char*)buffer)[*length] = '\0';
return (char*)buffer;
}
int clDevicesNum() {
cl_int status = 0;
cl_uint numPlatforms;
cl_platform_id platform = NULL;
status = clGetPlatformIDs(0, NULL, &numPlatforms);
if(status != CL_SUCCESS)
{
printf("Error: Getting Platforms. (clGetPlatformsIDs)\n");
return -1;
}
if(numPlatforms > 0)
{
cl_platform_id* platforms = (cl_platform_id *)malloc(numPlatforms*sizeof(cl_platform_id));
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
if(status != CL_SUCCESS)
{
printf("Error: Getting Platform Ids. (clGetPlatformsIDs)\n");
return -1;
}
unsigned int i;
for(i=0; i < numPlatforms; ++i)
{
char pbuff[100];
status = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuff), pbuff, NULL);
if(status != CL_SUCCESS)
{
printf("Error: Getting Platform Info. (clGetPlatformInfo)\n");
free(platforms);
return -1;
}
platform = platforms[i];
if(!strcmp(pbuff, "Advanced Micro Devices, Inc."))
{
break;
}
}
free(platforms);
}
if(platform == NULL) {
perror("NULL platform found!\n");
return -1;
}
cl_uint numDevices;
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
if(status != CL_SUCCESS)
{
printf("Error: Getting Device IDs (num)\n");
return -1;
}
return numDevices;
}
_clState *initCl(int gpu, char *name, size_t nameSize) {
cl_int status = 0;
_clState *clState = malloc(sizeof(_clState));;
cl_uint numPlatforms;
cl_platform_id platform = NULL;
status = clGetPlatformIDs(0, NULL, &numPlatforms);
if(status != CL_SUCCESS)
{
printf("Error: Getting Platforms. (clGetPlatformsIDs)\n");
return NULL;
}
if(numPlatforms > 0)
{
cl_platform_id* platforms = (cl_platform_id *)malloc(numPlatforms*sizeof(cl_platform_id));
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
if(status != CL_SUCCESS)
{
printf("Error: Getting Platform Ids. (clGetPlatformsIDs)\n");
return NULL;
}
unsigned int i;
for(i=0; i < numPlatforms; ++i)
{
char pbuff[100];
status = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuff), pbuff, NULL);
if(status != CL_SUCCESS)
{
printf("Error: Getting Platform Info. (clGetPlatformInfo)\n");
free(platforms);
return NULL;
}
platform = platforms[i];
if(!strcmp(pbuff, "Advanced Micro Devices, Inc."))
{
break;
}
}
free(platforms);
}
if(platform == NULL) {
perror("NULL platform found!\n");
return NULL;
}
cl_uint numDevices;
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
if(status != CL_SUCCESS)
{
printf("Error: Getting Device IDs (num)\n");
return NULL;
}
cl_device_id *devices;
if(numDevices > 0 ) {
devices = (cl_device_id *)malloc(numDevices*sizeof(cl_device_id));
/* Now, get the device list data */
status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
if(status != CL_SUCCESS)
{
printf("Error: Getting Device IDs (list)\n");
return NULL;
}
printf("List of devices:\n");
int i;
for(i=0; i<numDevices; i++) {
char pbuff[100];
status = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL);
if(status != CL_SUCCESS)
{
printf("Error: Getting Device Info\n");
return NULL;
}
printf("\t%i\t%s\n", i, pbuff);
}
if (gpu >= 0 && gpu < numDevices) {
char pbuff[100];
status = clGetDeviceInfo(devices[gpu], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL);
if(status != CL_SUCCESS)
{
printf("Error: Getting Device Info\n");
return NULL;
}
printf("Selected %i: %s\n", gpu, pbuff);
strncpy(name, pbuff, nameSize);
} else {
printf("Invalid GPU %i\n", gpu);
return NULL;
}
} else return NULL;
cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 };
clState->context = clCreateContextFromType(cps, CL_DEVICE_TYPE_GPU, NULL, NULL, &status);
if(status != CL_SUCCESS)
{
printf("Error: Creating Context. (clCreateContextFromType)\n");
return NULL;
}
/////////////////////////////////////////////////////////////////
// Load CL file, build CL program object, create CL kernel object
/////////////////////////////////////////////////////////////////
//
const char * filename = "oclminer.cl";
int pl;
char *source = file_contents(filename, &pl);
size_t sourceSize[] = {(size_t)pl};
clState->program = clCreateProgramWithSource(clState->context, 1, (const char **)&source, sourceSize, &status);
if(status != CL_SUCCESS)
{
printf("Error: Loading Binary into cl_program (clCreateProgramWithBinary)\n");
return NULL;
}
/* create a cl program executable for all the devices specified */
status = clBuildProgram(clState->program, 1, &devices[gpu], NULL, NULL, NULL);
if(status != CL_SUCCESS)
{
printf("Error: Building Program (clBuildProgram)\n");
size_t logSize;
status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);
char *log = malloc(logSize);
status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, logSize, log, NULL);
printf("%s\n", log);
return NULL;
}
/* get a kernel object handle for a kernel with the given name */
clState->kernel = clCreateKernel(clState->program, "oclminer", &status);
if(status != CL_SUCCESS)
{
printf("Error: Creating Kernel from program. (clCreateKernel)\n");
return NULL;
}
/////////////////////////////////////////////////////////////////
// Create an OpenCL command queue
/////////////////////////////////////////////////////////////////
clState->commandQueue = clCreateCommandQueue( clState->context, devices[gpu], 0, &status);
if(status != CL_SUCCESS)
{
printf("Creating Command Queue. (clCreateCommandQueue)\n");
return NULL;
}
clState->inputBuffer = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, sizeof(dev_blk_ctx), NULL, &status);
if(status != CL_SUCCESS) {
printf("Error: clCreateBuffer (inputBuffer)\n");
return NULL;
}
clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, sizeof(uint32_t) * MAXTHREADS, NULL, &status);
if(status != CL_SUCCESS) {
printf("Error: clCreateBuffer (outputBuffer)\n");
return NULL;
}
return clState;
}

22
ocl.h Normal file
View File

@ -0,0 +1,22 @@
#ifndef __OCL_H__
#define __OCL_H__
#ifdef __APPLE_CC__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
typedef struct {
cl_context context;
cl_kernel kernel;
cl_command_queue commandQueue;
cl_program program;
cl_mem inputBuffer;
cl_mem outputBuffer;
} _clState;
extern char *file_contents(const char *filename, int *length);
extern int clDevicesNum();
extern _clState *initCl(int gpu, char *name, size_t nameSize);
#endif /* __OCL_H__ */

284
oclminer.cl Normal file
View File

@ -0,0 +1,284 @@
#define rotr(x, n) rotate(x, (uint)(32 - n))
#define WGS __attribute__((reqd_work_group_size(128, 1, 1)))
__constant uint K[64] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
typedef struct {
uint ctx_a; uint ctx_b; uint ctx_c; uint ctx_d;
uint ctx_e; uint ctx_f; uint ctx_g; uint ctx_h;
uint cty_a; uint cty_b; uint cty_c; uint cty_d;
uint cty_e; uint cty_f; uint cty_g; uint cty_h;
uint merkle; uint ntime; uint nbits; uint nonce;
uint fW0; uint fW1; uint fW2; uint fW3; uint fW15;
uint fW01r; uint fcty_e; uint fcty_e2;
} dev_blk_ctx;
__kernel __attribute__((vec_type_hint(uint))) WGS void oclminer(
__constant dev_blk_ctx *ctx, __global uint *output)
{
const uint fW0 = ctx->fW0;
const uint fW1 = ctx->fW1;
const uint fW2 = ctx->fW2;
const uint fW3 = ctx->fW3;
const uint fW15 = ctx->fW15;
const uint fW01r = ctx->fW01r;
const uint fcty_e = ctx->fcty_e;
const uint fcty_e2 = ctx->fcty_e2;
const uint state0 = ctx->ctx_a;
const uint state1 = ctx->ctx_b;
const uint state2 = ctx->ctx_c;
const uint state3 = ctx->ctx_d;
const uint state4 = ctx->ctx_e;
const uint state5 = ctx->ctx_f;
const uint state6 = ctx->ctx_g;
const uint state7 = ctx->ctx_h;
const uint B1 = ctx->cty_b;
const uint C1 = ctx->cty_c;
const uint D1 = ctx->cty_d;
const uint F1 = ctx->cty_f;
const uint G1 = ctx->cty_g;
const uint H1 = ctx->cty_h;
uint A, B, C, D, E, F, G, H;
uint W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15;
uint it, res = 0;
const uint myid = get_global_id(0);
const uint tnonce = (ctx->nonce + myid)<<10;
for(it = 0; it != 1024; it++) {
W3 = it ^ tnonce;
E = fcty_e + W3; A = state0 + E; E = E + fcty_e2;
D = D1 + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C1 ^ (A & (B1 ^ C1))) + K[ 4] + 0x80000000; H = H1 + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F1) | (G1 & (E | F1)));
C = C1 + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B1 ^ (H & (A ^ B1))) + K[ 5]; G = G1 + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F1 & (D | E)));
B = B1 + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[ 6]; F = F1 + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D)));
A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[ 7]; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C)));
H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[ 8]; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B)));
G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[ 9]; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A)));
F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[10]; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H)));
E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[11]; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[12]; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F)));
C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[13]; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E)));
B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[14]; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D)));
A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[15] + 0x00000280; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C)));
H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[16] + fW0; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B)));
G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[17] + fW1; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A)));
W2 = (rotr(W3, 7) ^ rotr(W3, 18) ^ (W3 >> 3)) + fW2;
F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[18] + W2; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H)));
W3 = W3 + fW3;
E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[19] + W3; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
W4 = (rotr(W2, 17) ^ rotr(W2, 19) ^ (W2 >> 10)) + 0x80000000;
D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[20] + W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F)));
W5 = (rotr(W3, 17) ^ rotr(W3, 19) ^ (W3 >> 10));
C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[21] + W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E)));
W6 = (rotr(W4, 17) ^ rotr(W4, 19) ^ (W4 >> 10)) + 0x00000280;
B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[22] + W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D)));
W7 = (rotr(W5, 17) ^ rotr(W5, 19) ^ (W5 >> 10)) + fW0;
A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[23] + W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C)));
W8 = (rotr(W6, 17) ^ rotr(W6, 19) ^ (W6 >> 10)) + fW1;
H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[24] + W8; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B)));
W9 = W2 + (rotr(W7, 17) ^ rotr(W7, 19) ^ (W7 >> 10));
G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[25] + W9; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A)));
W10 = W3 + (rotr(W8, 17) ^ rotr(W8, 19) ^ (W8 >> 10));
F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[26] + W10; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H)));
W11 = W4 + (rotr(W9, 17) ^ rotr(W9, 19) ^ (W9 >> 10));
E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[27] + W11; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
W12 = W5 + (rotr(W10, 17) ^ rotr(W10, 19) ^ (W10 >> 10));
D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[28] + W12; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F)));
W13 = W6 + (rotr(W11, 17) ^ rotr(W11, 19) ^ (W11 >> 10));
C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[29] + W13; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E)));
W14 = 0x00a00055 + W7 + (rotr(W12, 17) ^ rotr(W12, 19) ^ (W12 >> 10));
B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[30] + W14; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D)));
W15 = fW15 + W8 + (rotr(W13, 17) ^ rotr(W13, 19) ^ (W13 >> 10));
A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[31] + W15; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C)));
W0 = fW01r + W9 + (rotr(W14, 17) ^ rotr(W14, 19) ^ (W14 >> 10));
H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[32] + W0; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B)));
W1 = fW1 + (rotr(W2, 7) ^ rotr(W2, 18) ^ (W2 >> 3)) + W10 + (rotr(W15, 17) ^ rotr(W15, 19) ^ (W15 >> 10));
G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[33] + W1; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A)));
W2 = W2 + (rotr(W3, 7) ^ rotr(W3, 18) ^ (W3 >> 3)) + W11 + (rotr(W0, 17) ^ rotr(W0, 19) ^ (W0 >> 10));
F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[34] + W2; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H)));
W3 = W3 + (rotr(W4, 7) ^ rotr(W4, 18) ^ (W4 >> 3)) + W12 + (rotr(W1, 17) ^ rotr(W1, 19) ^ (W1 >> 10));
E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[35] + W3; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
W4 = W4 + (rotr(W5, 7) ^ rotr(W5, 18) ^ (W5 >> 3)) + W13 + (rotr(W2, 17) ^ rotr(W2, 19) ^ (W2 >> 10));
D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[36] + W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F)));
W5 = W5 + (rotr(W6, 7) ^ rotr(W6, 18) ^ (W6 >> 3)) + W14 + (rotr(W3, 17) ^ rotr(W3, 19) ^ (W3 >> 10));
C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[37] + W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E)));
W6 = W6 + (rotr(W7, 7) ^ rotr(W7, 18) ^ (W7 >> 3)) + W15 + (rotr(W4, 17) ^ rotr(W4, 19) ^ (W4 >> 10));
B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[38] + W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D)));
W7 = W7 + (rotr(W8, 7) ^ rotr(W8, 18) ^ (W8 >> 3)) + W0 + (rotr(W5, 17) ^ rotr(W5, 19) ^ (W5 >> 10));
A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[39] + W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C)));
W8 = W8 + (rotr(W9, 7) ^ rotr(W9, 18) ^ (W9 >> 3)) + W1 + (rotr(W6, 17) ^ rotr(W6, 19) ^ (W6 >> 10));
H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[40] + W8; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B)));
W9 = W9 + (rotr(W10, 7) ^ rotr(W10, 18) ^ (W10 >> 3)) + W2 + (rotr(W7, 17) ^ rotr(W7, 19) ^ (W7 >> 10));
G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[41] + W9; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A)));
W10 = W10 + (rotr(W11, 7) ^ rotr(W11, 18) ^ (W11 >> 3)) + W3 + (rotr(W8, 17) ^ rotr(W8, 19) ^ (W8 >> 10));
F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[42] + W10; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H)));
W11 = W11 + (rotr(W12, 7) ^ rotr(W12, 18) ^ (W12 >> 3)) + W4 + (rotr(W9, 17) ^ rotr(W9, 19) ^ (W9 >> 10));
E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[43] + W11; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
W12 = W12 + (rotr(W13, 7) ^ rotr(W13, 18) ^ (W13 >> 3)) + W5 + (rotr(W10, 17) ^ rotr(W10, 19) ^ (W10 >> 10));
D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[44] + W12; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F)));
W13 = W13 + (rotr(W14, 7) ^ rotr(W14, 18) ^ (W14 >> 3)) + W6 + (rotr(W11, 17) ^ rotr(W11, 19) ^ (W11 >> 10));
C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[45] + W13; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E)));
W14 = W14 + (rotr(W15, 7) ^ rotr(W15, 18) ^ (W15 >> 3)) + W7 + (rotr(W12, 17) ^ rotr(W12, 19) ^ (W12 >> 10));
B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[46] + W14; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D)));
W15 = W15 + (rotr(W0, 7) ^ rotr(W0, 18) ^ (W0 >> 3)) + W8 + (rotr(W13, 17) ^ rotr(W13, 19) ^ (W13 >> 10));
A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[47] + W15; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C)));
W0 = W0 + (rotr(W1, 7) ^ rotr(W1, 18) ^ (W1 >> 3)) + W9 + (rotr(W14, 17) ^ rotr(W14, 19) ^ (W14 >> 10));
H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[48] + W0; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B)));
W1 = W1 + (rotr(W2, 7) ^ rotr(W2, 18) ^ (W2 >> 3)) + W10 + (rotr(W15, 17) ^ rotr(W15, 19) ^ (W15 >> 10));
G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[49] + W1; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A)));
W2 = W2 + (rotr(W3, 7) ^ rotr(W3, 18) ^ (W3 >> 3)) + W11 + (rotr(W0, 17) ^ rotr(W0, 19) ^ (W0 >> 10));
F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[50] + W2; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H)));
W3 = W3 + (rotr(W4, 7) ^ rotr(W4, 18) ^ (W4 >> 3)) + W12 + (rotr(W1, 17) ^ rotr(W1, 19) ^ (W1 >> 10));
E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[51] + W3; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
W4 = W4 + (rotr(W5, 7) ^ rotr(W5, 18) ^ (W5 >> 3)) + W13 + (rotr(W2, 17) ^ rotr(W2, 19) ^ (W2 >> 10));
D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[52] + W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F)));
W5 = W5 + (rotr(W6, 7) ^ rotr(W6, 18) ^ (W6 >> 3)) + W14 + (rotr(W3, 17) ^ rotr(W3, 19) ^ (W3 >> 10));
C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[53] + W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E)));
W6 = W6 + (rotr(W7, 7) ^ rotr(W7, 18) ^ (W7 >> 3)) + W15 + (rotr(W4, 17) ^ rotr(W4, 19) ^ (W4 >> 10));
B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[54] + W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D)));
W7 = W7 + (rotr(W8, 7) ^ rotr(W8, 18) ^ (W8 >> 3)) + W0 + (rotr(W5, 17) ^ rotr(W5, 19) ^ (W5 >> 10));
A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[55] + W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C)));
W8 = W8 + (rotr(W9, 7) ^ rotr(W9, 18) ^ (W9 >> 3)) + W1 + (rotr(W6, 17) ^ rotr(W6, 19) ^ (W6 >> 10));
H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[56] + W8; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B)));
W9 = W9 + (rotr(W10, 7) ^ rotr(W10, 18) ^ (W10 >> 3)) + W2 + (rotr(W7, 17) ^ rotr(W7, 19) ^ (W7 >> 10));
G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[57] + W9; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A)));
W10 = W10 + (rotr(W11, 7) ^ rotr(W11, 18) ^ (W11 >> 3)) + W3 + (rotr(W8, 17) ^ rotr(W8, 19) ^ (W8 >> 10));
F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[58] + W10; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H)));
W11 = W11 + (rotr(W12, 7) ^ rotr(W12, 18) ^ (W12 >> 3)) + W4 + (rotr(W9, 17) ^ rotr(W9, 19) ^ (W9 >> 10));
E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[59] + W11; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
W12 = W12 + (rotr(W13, 7) ^ rotr(W13, 18) ^ (W13 >> 3)) + W5 + (rotr(W10, 17) ^ rotr(W10, 19) ^ (W10 >> 10));
D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[60] + W12; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F)));
W13 = W13 + (rotr(W14, 7) ^ rotr(W14, 18) ^ (W14 >> 3)) + W6 + (rotr(W11, 17) ^ rotr(W11, 19) ^ (W11 >> 10));
C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[61] + W13; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E)));
W14 = W14 + (rotr(W15, 7) ^ rotr(W15, 18) ^ (W15 >> 3)) + W7 + (rotr(W12, 17) ^ rotr(W12, 19) ^ (W12 >> 10));
B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[62] + W14; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D)));
W15 = W15 + (rotr(W0, 7) ^ rotr(W0, 18) ^ (W0 >> 3)) + W8 + (rotr(W13, 17) ^ rotr(W13, 19) ^ (W13 >> 10));
A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[63] + W15; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C)));
W0 = A + state0; W1 = B + state1;
W2 = C + state2; W3 = D + state3;
W4 = E + state4; W5 = F + state5;
W6 = G + state6; W7 = H + state7;
H = 0xb0edbdd0 + K[ 0] + W0; D = 0xa54ff53a + H; H = H + 0x08909ae5;
G = 0x1f83d9ab + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (0x9b05688c ^ (D & 0xca0b3af3)) + K[ 1] + W1; C = 0x3c6ef372 + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & 0x6a09e667) | (0xbb67ae85 & (H | 0x6a09e667)));
F = 0x9b05688c + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (0x510e527f ^ (C & (D ^ 0x510e527f))) + K[ 2] + W2; B = 0xbb67ae85 + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (0x6a09e667 & (G | H)));
E = 0x510e527f + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[ 3] + W3; A = 0x6a09e667 + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[ 4] + W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F)));
C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[ 5] + W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E)));
B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[ 6] + W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D)));
A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[ 7] + W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C)));
H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[ 8] + 0x80000000; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B)));
G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[ 9]; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A)));
F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[10]; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H)));
E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[11]; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[12]; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F)));
C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[13]; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E)));
B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[14]; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D)));
A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[15] + 0x00000100; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C)));
W0 = W0 + (rotr(W1, 7) ^ rotr(W1, 18) ^ (W1 >> 3));
H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[16] + W0; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B)));
W1 = W1 + (rotr(W2, 7) ^ rotr(W2, 18) ^ (W2 >> 3)) + 0x00a00000;
G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[17] + W1; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A)));
W2 = W2 + (rotr(W3, 7) ^ rotr(W3, 18) ^ (W3 >> 3)) + (rotr(W0, 17) ^ rotr(W0, 19) ^ (W0 >> 10));
F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[18] + W2; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H)));
W3 = W3 + (rotr(W4, 7) ^ rotr(W4, 18) ^ (W4 >> 3)) + (rotr(W1, 17) ^ rotr(W1, 19) ^ (W1 >> 10));
E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[19] + W3; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
W4 = W4 + (rotr(W5, 7) ^ rotr(W5, 18) ^ (W5 >> 3)) + (rotr(W2, 17) ^ rotr(W2, 19) ^ (W2 >> 10));
D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[20] + W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F)));
W5 = W5 + (rotr(W6, 7) ^ rotr(W6, 18) ^ (W6 >> 3)) + (rotr(W3, 17) ^ rotr(W3, 19) ^ (W3 >> 10));
C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[21] + W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E)));
W6 = W6 + (rotr(W7, 7) ^ rotr(W7, 18) ^ (W7 >> 3)) + 0x00000100 + (rotr(W4, 17) ^ rotr(W4, 19) ^ (W4 >> 10));
B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[22] + W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D)));
W7 = W7 + 0x11002000 + W0 + (rotr(W5, 17) ^ rotr(W5, 19) ^ (W5 >> 10));
A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[23] + W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C)));
W8 = 0x80000000 + W1 + (rotr(W6, 17) ^ rotr(W6, 19) ^ (W6 >> 10));
H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[24] + W8; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B)));
W9 = W2 + (rotr(W7, 17) ^ rotr(W7, 19) ^ (W7 >> 10));
G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[25] + W9; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A)));
W10 = W3 + (rotr(W8, 17) ^ rotr(W8, 19) ^ (W8 >> 10));
F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[26] + W10; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H)));
W11 = W4 + (rotr(W9, 17) ^ rotr(W9, 19) ^ (W9 >> 10));
E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[27] + W11; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
W12 = W5 + (rotr(W10, 17) ^ rotr(W10, 19) ^ (W10 >> 10));
D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[28] + W12; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F)));
W13 = W6 + (rotr(W11, 17) ^ rotr(W11, 19) ^ (W11 >> 10));
C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[29] + W13; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E)));
W14 = 0x00400022 + W7 + (rotr(W12, 17) ^ rotr(W12, 19) ^ (W12 >> 10));
B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[30] + W14; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D)));
W15 = 0x00000100 + (rotr(W0, 7) ^ rotr(W0, 18) ^ (W0 >> 3)) + W8 + (rotr(W13, 17) ^ rotr(W13, 19) ^ (W13 >> 10));
A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[31] + W15; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C)));
W0 = W0 + (rotr(W1, 7) ^ rotr(W1, 18) ^ (W1 >> 3)) + W9 + (rotr(W14, 17) ^ rotr(W14, 19) ^ (W14 >> 10));
H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[32] + W0; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B)));
W1 = W1 + (rotr(W2, 7) ^ rotr(W2, 18) ^ (W2 >> 3)) + W10 + (rotr(W15, 17) ^ rotr(W15, 19) ^ (W15 >> 10));
G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[33] + W1; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A)));
W2 = W2 + (rotr(W3, 7) ^ rotr(W3, 18) ^ (W3 >> 3)) + W11 + (rotr(W0, 17) ^ rotr(W0, 19) ^ (W0 >> 10));
F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[34] + W2; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H)));
W3 = W3 + (rotr(W4, 7) ^ rotr(W4, 18) ^ (W4 >> 3)) + W12 + (rotr(W1, 17) ^ rotr(W1, 19) ^ (W1 >> 10));
E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[35] + W3; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
W4 = W4 + (rotr(W5, 7) ^ rotr(W5, 18) ^ (W5 >> 3)) + W13 + (rotr(W2, 17) ^ rotr(W2, 19) ^ (W2 >> 10));
D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[36] + W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F)));
W5 = W5 + (rotr(W6, 7) ^ rotr(W6, 18) ^ (W6 >> 3)) + W14 + (rotr(W3, 17) ^ rotr(W3, 19) ^ (W3 >> 10));
C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[37] + W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E)));
W6 = W6 + (rotr(W7, 7) ^ rotr(W7, 18) ^ (W7 >> 3)) + W15 + (rotr(W4, 17) ^ rotr(W4, 19) ^ (W4 >> 10));
B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[38] + W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D)));
W7 = W7 + (rotr(W8, 7) ^ rotr(W8, 18) ^ (W8 >> 3)) + W0 + (rotr(W5, 17) ^ rotr(W5, 19) ^ (W5 >> 10));
A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[39] + W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C)));
W8 = W8 + (rotr(W9, 7) ^ rotr(W9, 18) ^ (W9 >> 3)) + W1 + (rotr(W6, 17) ^ rotr(W6, 19) ^ (W6 >> 10));
H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[40] + W8; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B)));
W9 = W9 + (rotr(W10, 7) ^ rotr(W10, 18) ^ (W10 >> 3)) + W2 + (rotr(W7, 17) ^ rotr(W7, 19) ^ (W7 >> 10));
G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[41] + W9; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A)));
W10 = W10 + (rotr(W11, 7) ^ rotr(W11, 18) ^ (W11 >> 3)) + W3 + (rotr(W8, 17) ^ rotr(W8, 19) ^ (W8 >> 10));
F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[42] + W10; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H)));
W11 = W11 + (rotr(W12, 7) ^ rotr(W12, 18) ^ (W12 >> 3)) + W4 + (rotr(W9, 17) ^ rotr(W9, 19) ^ (W9 >> 10));
E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[43] + W11; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
W12 = W12 + (rotr(W13, 7) ^ rotr(W13, 18) ^ (W13 >> 3)) + W5 + (rotr(W10, 17) ^ rotr(W10, 19) ^ (W10 >> 10));
D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[44] + W12; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F)));
W13 = W13 + (rotr(W14, 7) ^ rotr(W14, 18) ^ (W14 >> 3)) + W6 + (rotr(W11, 17) ^ rotr(W11, 19) ^ (W11 >> 10));
C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[45] + W13; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E)));
W14 = W14 + (rotr(W15, 7) ^ rotr(W15, 18) ^ (W15 >> 3)) + W7 + (rotr(W12, 17) ^ rotr(W12, 19) ^ (W12 >> 10));
B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[46] + W14; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D)));
W15 = W15 + (rotr(W0, 7) ^ rotr(W0, 18) ^ (W0 >> 3)) + W8 + (rotr(W13, 17) ^ rotr(W13, 19) ^ (W13 >> 10));
A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[47] + W15; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C)));
W0 = W0 + (rotr(W1, 7) ^ rotr(W1, 18) ^ (W1 >> 3)) + W9 + (rotr(W14, 17) ^ rotr(W14, 19) ^ (W14 >> 10));
H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[48] + W0; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B)));
W1 = W1 + (rotr(W2, 7) ^ rotr(W2, 18) ^ (W2 >> 3)) + W10 + (rotr(W15, 17) ^ rotr(W15, 19) ^ (W15 >> 10));
G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[49] + W1; C = C + G; G = G + (rotr(H, 2) ^ rotr(H, 13) ^ rotr(H, 22)) + ((H & A) | (B & (H | A)));
W2 = W2 + (rotr(W3, 7) ^ rotr(W3, 18) ^ (W3 >> 3)) + W11 + (rotr(W0, 17) ^ rotr(W0, 19) ^ (W0 >> 10));
F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[50] + W2; B = B + F; F = F + (rotr(G, 2) ^ rotr(G, 13) ^ rotr(G, 22)) + ((G & H) | (A & (G | H)));
W3 = W3 + (rotr(W4, 7) ^ rotr(W4, 18) ^ (W4 >> 3)) + W12 + (rotr(W1, 17) ^ rotr(W1, 19) ^ (W1 >> 10));
E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[51] + W3; A = A + E; E = E + (rotr(F, 2) ^ rotr(F, 13) ^ rotr(F, 22)) + ((F & G) | (H & (F | G)));
W4 = W4 + (rotr(W5, 7) ^ rotr(W5, 18) ^ (W5 >> 3)) + W13 + (rotr(W2, 17) ^ rotr(W2, 19) ^ (W2 >> 10));
D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[52] + W4; H = H + D; D = D + (rotr(E, 2) ^ rotr(E, 13) ^ rotr(E, 22)) + ((E & F) | (G & (E | F)));
W5 = W5 + (rotr(W6, 7) ^ rotr(W6, 18) ^ (W6 >> 3)) + W14 + (rotr(W3, 17) ^ rotr(W3, 19) ^ (W3 >> 10));
C = C + (rotr(H, 6) ^ rotr(H, 11) ^ rotr(H, 25)) + (B ^ (H & (A ^ B))) + K[53] + W5; G = G + C; C = C + (rotr(D, 2) ^ rotr(D, 13) ^ rotr(D, 22)) + ((D & E) | (F & (D | E)));
W6 = W6 + (rotr(W7, 7) ^ rotr(W7, 18) ^ (W7 >> 3)) + W15 + (rotr(W4, 17) ^ rotr(W4, 19) ^ (W4 >> 10));
B = B + (rotr(G, 6) ^ rotr(G, 11) ^ rotr(G, 25)) + (A ^ (G & (H ^ A))) + K[54] + W6; F = F + B; B = B + (rotr(C, 2) ^ rotr(C, 13) ^ rotr(C, 22)) + ((C & D) | (E & (C | D)));
W7 = W7 + (rotr(W8, 7) ^ rotr(W8, 18) ^ (W8 >> 3)) + W0 + (rotr(W5, 17) ^ rotr(W5, 19) ^ (W5 >> 10));
A = A + (rotr(F, 6) ^ rotr(F, 11) ^ rotr(F, 25)) + (H ^ (F & (G ^ H))) + K[55] + W7; E = E + A; A = A + (rotr(B, 2) ^ rotr(B, 13) ^ rotr(B, 22)) + ((B & C) | (D & (B | C)));
W8 = W8 + (rotr(W9, 7) ^ rotr(W9, 18) ^ (W9 >> 3)) + W1 + (rotr(W6, 17) ^ rotr(W6, 19) ^ (W6 >> 10));
H = H + (rotr(E, 6) ^ rotr(E, 11) ^ rotr(E, 25)) + (G ^ (E & (F ^ G))) + K[56] + W8; D = D + H; H = H + (rotr(A, 2) ^ rotr(A, 13) ^ rotr(A, 22)) + ((A & B) | (C & (A | B)));
W9 = W9 + (rotr(W10, 7) ^ rotr(W10, 18) ^ (W10 >> 3)) + W2 + (rotr(W7, 17) ^ rotr(W7, 19) ^ (W7 >> 10));
G = G + (rotr(D, 6) ^ rotr(D, 11) ^ rotr(D, 25)) + (F ^ (D & (E ^ F))) + K[57] + W9; C = C + G;
W10 = W10 + (rotr(W11, 7) ^ rotr(W11, 18) ^ (W11 >> 3)) + W3 + (rotr(W8, 17) ^ rotr(W8, 19) ^ (W8 >> 10));
F = F + (rotr(C, 6) ^ rotr(C, 11) ^ rotr(C, 25)) + (E ^ (C & (D ^ E))) + K[58] + W10; B = B + F;
W11 = W11 + (rotr(W12, 7) ^ rotr(W12, 18) ^ (W12 >> 3)) + W4 + (rotr(W9, 17) ^ rotr(W9, 19) ^ (W9 >> 10));
E = E + (rotr(B, 6) ^ rotr(B, 11) ^ rotr(B, 25)) + (D ^ (B & (C ^ D))) + K[59] + W11; A = A + E;
W12 = W12 + (rotr(W13, 7) ^ rotr(W13, 18) ^ (W13 >> 3)) + W5 + (rotr(W10, 17) ^ rotr(W10, 19) ^ (W10 >> 10));
D = D + (rotr(A, 6) ^ rotr(A, 11) ^ rotr(A, 25)) + (C ^ (A & (B ^ C))) + K[60] + W12; H = H + D;
res |= (H==0xa41f32e7);
}
output[myid] = res;
}