From a17ec112f60505eaac9d6cbc91b06c750ee978cd Mon Sep 17 00:00:00 2001 From: phm Date: Fri, 21 Feb 2014 15:45:29 +0100 Subject: [PATCH 1/2] Added support for Quark mining. --- Makefile.am | 1 + configure.ac | 1 + driver-opencl.c | 10 +- kernel/quarkcoin.cl | 632 ++++++++++++++++++++++++++++++++++++++++++++ miner.h | 1 + ocl.c | 5 + quarkcoin.c | 225 ++++++++++++++++ quarkcoin.h | 10 + sgminer.c | 7 +- 9 files changed, 889 insertions(+), 3 deletions(-) create mode 100644 kernel/quarkcoin.cl create mode 100644 quarkcoin.c create mode 100644 quarkcoin.h diff --git a/Makefile.am b/Makefile.am index dda3b399..b1b0c272 100644 --- a/Makefile.am +++ b/Makefile.am @@ -43,6 +43,7 @@ sgminer_SOURCES += findnonce.c findnonce.h sgminer_SOURCES += adl.c adl.h adl_functions.h sgminer_SOURCES += scrypt.c scrypt.h sgminer_SOURCES += darkcoin.c darkcoin.h +sgminer_SOURCES += quarkcoin.c quarkcoin.h sgminer_SOURCES += kernel/*.cl bin_SCRIPTS = $(top_srcdir)/kernel/*.cl diff --git a/configure.ac b/configure.ac index a7d5656e..8c0e8d67 100644 --- a/configure.ac +++ b/configure.ac @@ -346,6 +346,7 @@ AC_DEFINE_UNQUOTED([CKOLIVAS_KERNNAME], ["ckolivas"], [Filename for original scr AC_DEFINE_UNQUOTED([ZUIKKIS_KERNNAME], ["zuikkis"], [Filename for Zuikkis' optimised kernel]) AC_DEFINE_UNQUOTED([PSW_KERNNAME], ["psw"], [Filename for psw's experimental kernel]) AC_DEFINE_UNQUOTED([DARKCOIN_KERNNAME], ["darkcoin"], [Filename for DarkCoin optimised kernel]) +AC_DEFINE_UNQUOTED([QUARKCOIN_KERNNAME], ["quarkcoin"], [Filename for QuarkCoin optimised kernel]) AC_SUBST(OPENCL_LIBS) AC_SUBST(OPENCL_FLAGS) diff --git a/driver-opencl.c b/driver-opencl.c index 6680686e..a2333c3a 100644 --- a/driver-opencl.c +++ b/driver-opencl.c @@ -209,6 +209,8 @@ static enum cl_kernels select_kernel(char *arg) return KL_PSW; if (!strcmp(arg, DARKCOIN_KERNNAME)) return KL_DARKCOIN; + if (!strcmp(arg, QUARKCOIN_KERNNAME)) + return KL_QUARKCOIN; return KL_NONE; } @@ -226,8 +228,8 @@ char *set_kernel(char *arg) if (kern == KL_NONE) return "Invalid parameter to set_kernel"; gpus[device++].kernel = kern; - if (kern >= KL_DARKCOIN) - is_scrypt = false; +// if (kern >= KL_DARKCOIN) +// is_scrypt = false; while ((nextptr = strtok(NULL, ",")) != NULL) { kern = select_kernel(nextptr); @@ -1348,6 +1350,9 @@ static bool opencl_thread_prepare(struct thr_info *thr) case KL_DARKCOIN: cgpu->kname = DARKCOIN_KERNNAME; break; + case KL_QUARKCOIN: + cgpu->kname = QUARKCOIN_KERNNAME; + break; default: break; } @@ -1384,6 +1389,7 @@ static bool opencl_thread_init(struct thr_info *thr) thrdata->queue_kernel_parameters = &queue_scrypt_kernel; break; case KL_DARKCOIN: + case KL_QUARKCOIN: thrdata->queue_kernel_parameters = &queue_sph_kernel; break; default: diff --git a/kernel/quarkcoin.cl b/kernel/quarkcoin.cl new file mode 100644 index 00000000..eba855ae --- /dev/null +++ b/kernel/quarkcoin.cl @@ -0,0 +1,632 @@ +/* + * QuarkCoin kernel implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2014 phm + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author phm + */ + +#ifndef QUARKCOIN_CL +#define QUARKCOIN_CL + +#if __ENDIAN_LITTLE__ +#define SPH_LITTLE_ENDIAN 1 +#else +#define SPH_BIG_ENDIAN 1 +#endif + +#define SPH_UPTR sph_u64 + +typedef unsigned int sph_u32; +typedef int sph_s32; +#ifndef __OPENCL_VERSION__ +typedef unsigned long long sph_u64; +typedef long long sph_s64; +#else +typedef unsigned long sph_u64; +typedef long sph_s64; +#endif + +#define SPH_64 1 +#define SPH_64_TRUE 1 + +#define SPH_C32(x) ((sph_u32)(x ## U)) +#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) +#define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n)))) +#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) + +#define SPH_C64(x) ((sph_u64)(x ## UL)) +#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF)) +#define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n)))) +#define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n))) + +#define SPH_ECHO_64 1 +#define SPH_KECCAK_64 1 +#define SPH_JH_64 1 +#define SPH_SIMD_NOCOPY 0 +#define SPH_KECCAK_NOCOPY 0 +#define SPH_COMPACT_BLAKE_64 0 +#define SPH_LUFFA_PARALLEL 0 +#define SPH_SMALL_FOOTPRINT_GROESTL 0 +#define SPH_GROESTL_BIG_ENDIAN 0 + +#define SPH_CUBEHASH_UNROLL 0 +#define SPH_KECCAK_UNROLL 0 + +#include "blake.cl" +#include "bmw.cl" +#include "groestl.cl" +#include "jh.cl" +#include "keccak.cl" +#include "skein.cl" + +#define SWAP4(x) as_uint(as_uchar4(x).wzyx) +#define SWAP8(x) as_ulong(as_uchar8(x).s76543210) + +#if SPH_BIG_ENDIAN + #define DEC64E(x) (x) + #define DEC64BE(x) (*(const __global sph_u64 *) (x)); +#else + #define DEC64E(x) SWAP8(x) + #define DEC64BE(x) SWAP8(*(const __global sph_u64 *) (x)); +#endif + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search(__global unsigned char* block, volatile __global uint* output, const ulong target) +{ + uint gid = get_global_id(0); + union { + unsigned char h1[64]; + uint h4[16]; + ulong h8[8]; + } hash; + + // blake +{ + sph_u64 H0 = SPH_C64(0x6A09E667F3BCC908), H1 = SPH_C64(0xBB67AE8584CAA73B); + sph_u64 H2 = SPH_C64(0x3C6EF372FE94F82B), H3 = SPH_C64(0xA54FF53A5F1D36F1); + sph_u64 H4 = SPH_C64(0x510E527FADE682D1), H5 = SPH_C64(0x9B05688C2B3E6C1F); + sph_u64 H6 = SPH_C64(0x1F83D9ABFB41BD6B), H7 = SPH_C64(0x5BE0CD19137E2179); + sph_u64 S0 = 0, S1 = 0, S2 = 0, S3 = 0; + sph_u64 T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + (80 << 3), T1 = 0xFFFFFFFFFFFFFFFF;; + + if ((T0 = SPH_T64(T0 + 1024)) < 1024) + { + T1 = SPH_T64(T1 + 1); + } + sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; + sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; + sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; + sph_u64 V8, V9, VA, VB, VC, VD, VE, VF; + M0 = DEC64BE(block + 0); + M1 = DEC64BE(block + 8); + M2 = DEC64BE(block + 16); + M3 = DEC64BE(block + 24); + M4 = DEC64BE(block + 32); + M5 = DEC64BE(block + 40); + M6 = DEC64BE(block + 48); + M7 = DEC64BE(block + 56); + M8 = DEC64BE(block + 64); + M9 = DEC64BE(block + 72); + M9 &= 0xFFFFFFFF00000000; + M9 ^= SWAP4(gid); + MA = 0x8000000000000000; + MB = 0; + MC = 0; + MD = 1; + ME = 0; + MF = 0x280; + + COMPRESS64; + + hash.h8[0] = H0; + hash.h8[1] = H1; + hash.h8[2] = H2; + hash.h8[3] = H3; + hash.h8[4] = H4; + hash.h8[5] = H5; + hash.h8[6] = H6; + hash.h8[7] = H7; +} + // bmw + sph_u64 BMW_H[16]; + for(unsigned u = 0; u < 16; u++) + BMW_H[u] = BMW_IV512[u]; + + sph_u64 BMW_h1[16], BMW_h2[16]; + sph_u64 mv[16]; + + mv[ 0] = SWAP8(hash.h8[0]); + mv[ 1] = SWAP8(hash.h8[1]); + mv[ 2] = SWAP8(hash.h8[2]); + mv[ 3] = SWAP8(hash.h8[3]); + mv[ 4] = SWAP8(hash.h8[4]); + mv[ 5] = SWAP8(hash.h8[5]); + mv[ 6] = SWAP8(hash.h8[6]); + mv[ 7] = SWAP8(hash.h8[7]); + mv[ 8] = 0x80; + mv[ 9] = 0; + mv[10] = 0; + mv[11] = 0; + mv[12] = 0; + mv[13] = 0; + mv[14] = 0; + mv[15] = 0x200; +#define M(x) (mv[x]) +#define H(x) (BMW_H[x]) +#define dH(x) (BMW_h2[x]) + + FOLDb; + +#undef M +#undef H +#undef dH + +#define M(x) (BMW_h2[x]) +#define H(x) (final_b[x]) +#define dH(x) (BMW_h1[x]) + + FOLDb; + +#undef M +#undef H +#undef dH + + hash.h8[0] = SWAP8(BMW_h1[8]); + hash.h8[1] = SWAP8(BMW_h1[9]); + hash.h8[2] = SWAP8(BMW_h1[10]); + hash.h8[3] = SWAP8(BMW_h1[11]); + hash.h8[4] = SWAP8(BMW_h1[12]); + hash.h8[5] = SWAP8(BMW_h1[13]); + hash.h8[6] = SWAP8(BMW_h1[14]); + hash.h8[7] = SWAP8(BMW_h1[15]); + + if((hash.h1[7] & 0x8) != 0) { + + // groestl + + sph_u64 H[16]; + for (unsigned int u = 0; u < 15; u ++) + H[u] = 0; + #if USE_LE + H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); + #else + H[15] = (sph_u64)512; + #endif + + sph_u64 g[16], m[16]; + m[0] = DEC64E(hash.h8[0]); + m[1] = DEC64E(hash.h8[1]); + m[2] = DEC64E(hash.h8[2]); + m[3] = DEC64E(hash.h8[3]); + m[4] = DEC64E(hash.h8[4]); + m[5] = DEC64E(hash.h8[5]); + m[6] = DEC64E(hash.h8[6]); + m[7] = DEC64E(hash.h8[7]); + for (unsigned int u = 0; u < 16; u ++) + g[u] = m[u] ^ H[u]; + m[8] = 0x80; g[8] = m[8] ^ H[8]; + m[9] = 0; g[9] = m[9] ^ H[9]; + m[10] = 0; g[10] = m[10] ^ H[10]; + m[11] = 0; g[11] = m[11] ^ H[11]; + m[12] = 0; g[12] = m[12] ^ H[12]; + m[13] = 0; g[13] = m[13] ^ H[13]; + m[14] = 0; g[14] = m[14] ^ H[14]; + m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; + PERM_BIG_P(g); + PERM_BIG_Q(m); + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= g[u] ^ m[u]; + sph_u64 xH[16]; + for (unsigned int u = 0; u < 16; u ++) + xH[u] = H[u]; + PERM_BIG_P(xH); + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= xH[u]; + for (unsigned int u = 0; u < 8; u ++) + hash.h8[u] = DEC64E(H[u + 8]); + + } else { + + // skein + + sph_u64 h0 = SPH_C64(0x4903ADFF749C51CE), h1 = SPH_C64(0x0D95DE399746DF03), h2 = SPH_C64(0x8FD1934127C79BCE), h3 = SPH_C64(0x9A255629FF352CB1), h4 = SPH_C64(0x5DB62599DF6CA7B0), h5 = SPH_C64(0xEABE394CA9D5C3F4), h6 = SPH_C64(0x991112C71A75B523), h7 = SPH_C64(0xAE18A40B660FCC33); + sph_u64 m0, m1, m2, m3, m4, m5, m6, m7; + sph_u64 bcount = 0; + + m0 = SWAP8(hash.h8[0]); + m1 = SWAP8(hash.h8[1]); + m2 = SWAP8(hash.h8[2]); + m3 = SWAP8(hash.h8[3]); + m4 = SWAP8(hash.h8[4]); + m5 = SWAP8(hash.h8[5]); + m6 = SWAP8(hash.h8[6]); + m7 = SWAP8(hash.h8[7]); + UBI_BIG(480, 64); + bcount = 0; + m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = 0; + UBI_BIG(510, 8); + hash.h8[0] = SWAP8(h0); + hash.h8[1] = SWAP8(h1); + hash.h8[2] = SWAP8(h2); + hash.h8[3] = SWAP8(h3); + hash.h8[4] = SWAP8(h4); + hash.h8[5] = SWAP8(h5); + hash.h8[6] = SWAP8(h6); + hash.h8[7] = SWAP8(h7); + + } + + // groestl + + sph_u64 H[16]; + for (unsigned int u = 0; u < 15; u ++) + H[u] = 0; +#if USE_LE + H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); +#else + H[15] = (sph_u64)512; +#endif + + sph_u64 g[16], m[16]; + m[0] = DEC64E(hash.h8[0]); + m[1] = DEC64E(hash.h8[1]); + m[2] = DEC64E(hash.h8[2]); + m[3] = DEC64E(hash.h8[3]); + m[4] = DEC64E(hash.h8[4]); + m[5] = DEC64E(hash.h8[5]); + m[6] = DEC64E(hash.h8[6]); + m[7] = DEC64E(hash.h8[7]); + for (unsigned int u = 0; u < 16; u ++) + g[u] = m[u] ^ H[u]; + m[8] = 0x80; g[8] = m[8] ^ H[8]; + m[9] = 0; g[9] = m[9] ^ H[9]; + m[10] = 0; g[10] = m[10] ^ H[10]; + m[11] = 0; g[11] = m[11] ^ H[11]; + m[12] = 0; g[12] = m[12] ^ H[12]; + m[13] = 0; g[13] = m[13] ^ H[13]; + m[14] = 0; g[14] = m[14] ^ H[14]; + m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; + PERM_BIG_P(g); + PERM_BIG_Q(m); + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= g[u] ^ m[u]; + sph_u64 xH[16]; + for (unsigned int u = 0; u < 16; u ++) + xH[u] = H[u]; + PERM_BIG_P(xH); + for (unsigned int u = 0; u < 16; u ++) + H[u] ^= xH[u]; + for (unsigned int u = 0; u < 8; u ++) + hash.h8[u] = DEC64E(H[u + 8]); + + // jh + + sph_u64 h0h = C64e(0x6fd14b963e00aa17), h0l = C64e(0x636a2e057a15d543), h1h = C64e(0x8a225e8d0c97ef0b), h1l = C64e(0xe9341259f2b3c361), h2h = C64e(0x891da0c1536f801e), h2l = C64e(0x2aa9056bea2b6d80), h3h = C64e(0x588eccdb2075baa6), h3l = C64e(0xa90f3a76baf83bf7); + sph_u64 h4h = C64e(0x0169e60541e34a69), h4l = C64e(0x46b58a8e2e6fe65a), h5h = C64e(0x1047a7d0c1843c24), h5l = C64e(0x3b6e71b12d5ac199), h6h = C64e(0xcf57f6ec9db1f856), h6l = C64e(0xa706887c5716b156), h7h = C64e(0xe3c2fcdfe68517fb), h7l = C64e(0x545a4678cc8cdd4b); + sph_u64 tmp; + + for(int i = 0; i < 2; i++) + { + if (i == 0) { + h0h ^= DEC64E(hash.h8[0]); + h0l ^= DEC64E(hash.h8[1]); + h1h ^= DEC64E(hash.h8[2]); + h1l ^= DEC64E(hash.h8[3]); + h2h ^= DEC64E(hash.h8[4]); + h2l ^= DEC64E(hash.h8[5]); + h3h ^= DEC64E(hash.h8[6]); + h3l ^= DEC64E(hash.h8[7]); + } else if(i == 1) { + h4h ^= DEC64E(hash.h8[0]); + h4l ^= DEC64E(hash.h8[1]); + h5h ^= DEC64E(hash.h8[2]); + h5l ^= DEC64E(hash.h8[3]); + h6h ^= DEC64E(hash.h8[4]); + h6l ^= DEC64E(hash.h8[5]); + h7h ^= DEC64E(hash.h8[6]); + h7l ^= DEC64E(hash.h8[7]); + + h0h ^= 0x80; + h3l ^= 0x2000000000000; + } + E8; + } + h4h ^= 0x80; + h7l ^= 0x2000000000000; + + hash.h8[0] = DEC64E(h4h); + hash.h8[1] = DEC64E(h4l); + hash.h8[2] = DEC64E(h5h); + hash.h8[3] = DEC64E(h5l); + hash.h8[4] = DEC64E(h6h); + hash.h8[5] = DEC64E(h6l); + hash.h8[6] = DEC64E(h7h); + hash.h8[7] = DEC64E(h7l); + + if((hash.h1[7] & 0x8) != 0) { + + // blake + + sph_u64 H0 = SPH_C64(0x6A09E667F3BCC908), H1 = SPH_C64(0xBB67AE8584CAA73B); + sph_u64 H2 = SPH_C64(0x3C6EF372FE94F82B), H3 = SPH_C64(0xA54FF53A5F1D36F1); + sph_u64 H4 = SPH_C64(0x510E527FADE682D1), H5 = SPH_C64(0x9B05688C2B3E6C1F); + sph_u64 H6 = SPH_C64(0x1F83D9ABFB41BD6B), H7 = SPH_C64(0x5BE0CD19137E2179); + sph_u64 S0 = 0, S1 = 0, S2 = 0, S3 = 0; + sph_u64 T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + (64 << 3), T1 = 0xFFFFFFFFFFFFFFFF;; + + if ((T0 = SPH_T64(T0 + 1024)) < 1024) + { + T1 = SPH_T64(T1 + 1); + } + sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; + sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; + sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; + sph_u64 V8, V9, VA, VB, VC, VD, VE, VF; + M0 = hash.h8[0]; + M1 = hash.h8[1]; + M2 = hash.h8[2]; + M3 = hash.h8[3]; + M4 = hash.h8[4]; + M5 = hash.h8[5]; + M6 = hash.h8[6]; + M7 = hash.h8[7]; + M8 = 0x8000000000000000; + M9 = 0; + MA = 0; + MB = 0; + MC = 0; + MD = 1; + ME = 0; + MF = 0x200; + + COMPRESS64; + + hash.h8[0] = H0; + hash.h8[1] = H1; + hash.h8[2] = H2; + hash.h8[3] = H3; + hash.h8[4] = H4; + hash.h8[5] = H5; + hash.h8[6] = H6; + hash.h8[7] = H7; + + } else { + + // bmw + sph_u64 BMW_H[16]; + for(unsigned u = 0; u < 16; u++) + BMW_H[u] = BMW_IV512[u]; + + sph_u64 BMW_h1[16], BMW_h2[16]; + sph_u64 mv[16]; + + mv[ 0] = SWAP8(hash.h8[0]); + mv[ 1] = SWAP8(hash.h8[1]); + mv[ 2] = SWAP8(hash.h8[2]); + mv[ 3] = SWAP8(hash.h8[3]); + mv[ 4] = SWAP8(hash.h8[4]); + mv[ 5] = SWAP8(hash.h8[5]); + mv[ 6] = SWAP8(hash.h8[6]); + mv[ 7] = SWAP8(hash.h8[7]); + mv[ 8] = 0x80; + mv[ 9] = 0; + mv[10] = 0; + mv[11] = 0; + mv[12] = 0; + mv[13] = 0; + mv[14] = 0; + mv[15] = 0x200; + #define M(x) (mv[x]) + #define H(x) (BMW_H[x]) + #define dH(x) (BMW_h2[x]) + + FOLDb; + + #undef M + #undef H + #undef dH + + #define M(x) (BMW_h2[x]) + #define H(x) (final_b[x]) + #define dH(x) (BMW_h1[x]) + + FOLDb; + + #undef M + #undef H + #undef dH + + hash.h8[0] = SWAP8(BMW_h1[8]); + hash.h8[1] = SWAP8(BMW_h1[9]); + hash.h8[2] = SWAP8(BMW_h1[10]); + hash.h8[3] = SWAP8(BMW_h1[11]); + hash.h8[4] = SWAP8(BMW_h1[12]); + hash.h8[5] = SWAP8(BMW_h1[13]); + hash.h8[6] = SWAP8(BMW_h1[14]); + hash.h8[7] = SWAP8(BMW_h1[15]); + + } + + // keccak + + sph_u64 a00 = 0, a01 = 0, a02 = 0, a03 = 0, a04 = 0; + sph_u64 a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0; + sph_u64 a20 = 0, a21 = 0, a22 = 0, a23 = 0, a24 = 0; + sph_u64 a30 = 0, a31 = 0, a32 = 0, a33 = 0, a34 = 0; + sph_u64 a40 = 0, a41 = 0, a42 = 0, a43 = 0, a44 = 0; + + a10 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a20 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a31 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a22 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a23 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a04 = SPH_C64(0xFFFFFFFFFFFFFFFF); + + a00 ^= SWAP8(hash.h8[0]); + a10 ^= SWAP8(hash.h8[1]); + a20 ^= SWAP8(hash.h8[2]); + a30 ^= SWAP8(hash.h8[3]); + a40 ^= SWAP8(hash.h8[4]); + a01 ^= SWAP8(hash.h8[5]); + a11 ^= SWAP8(hash.h8[6]); + a21 ^= SWAP8(hash.h8[7]); + a31 ^= 0x8000000000000001; + KECCAK_F_1600; + // Finalize the "lane complement" + a10 = ~a10; + a20 = ~a20; + + hash.h8[0] = SWAP8(a00); + hash.h8[1] = SWAP8(a10); + hash.h8[2] = SWAP8(a20); + hash.h8[3] = SWAP8(a30); + hash.h8[4] = SWAP8(a40); + hash.h8[5] = SWAP8(a01); + hash.h8[6] = SWAP8(a11); + hash.h8[7] = SWAP8(a21); + + // skein + + sph_u64 h0 = SPH_C64(0x4903ADFF749C51CE), h1 = SPH_C64(0x0D95DE399746DF03), h2 = SPH_C64(0x8FD1934127C79BCE), h3 = SPH_C64(0x9A255629FF352CB1), h4 = SPH_C64(0x5DB62599DF6CA7B0), h5 = SPH_C64(0xEABE394CA9D5C3F4), h6 = SPH_C64(0x991112C71A75B523), h7 = SPH_C64(0xAE18A40B660FCC33); + sph_u64 m0, m1, m2, m3, m4, m5, m6, m7; + sph_u64 bcount = 0; + + m0 = SWAP8(hash.h8[0]); + m1 = SWAP8(hash.h8[1]); + m2 = SWAP8(hash.h8[2]); + m3 = SWAP8(hash.h8[3]); + m4 = SWAP8(hash.h8[4]); + m5 = SWAP8(hash.h8[5]); + m6 = SWAP8(hash.h8[6]); + m7 = SWAP8(hash.h8[7]); + UBI_BIG(480, 64); + bcount = 0; + m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = 0; + UBI_BIG(510, 8); + hash.h8[0] = SWAP8(h0); + hash.h8[1] = SWAP8(h1); + hash.h8[2] = SWAP8(h2); + hash.h8[3] = SWAP8(h3); + hash.h8[4] = SWAP8(h4); + hash.h8[5] = SWAP8(h5); + hash.h8[6] = SWAP8(h6); + hash.h8[7] = SWAP8(h7); + + if((hash.h1[7] & 0x8) != 0) { + + // keccak + + sph_u64 a00 = 0, a01 = 0, a02 = 0, a03 = 0, a04 = 0; + sph_u64 a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0; + sph_u64 a20 = 0, a21 = 0, a22 = 0, a23 = 0, a24 = 0; + sph_u64 a30 = 0, a31 = 0, a32 = 0, a33 = 0, a34 = 0; + sph_u64 a40 = 0, a41 = 0, a42 = 0, a43 = 0, a44 = 0; + + a10 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a20 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a31 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a22 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a23 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a04 = SPH_C64(0xFFFFFFFFFFFFFFFF); + + a00 ^= SWAP8(hash.h8[0]); + a10 ^= SWAP8(hash.h8[1]); + a20 ^= SWAP8(hash.h8[2]); + a30 ^= SWAP8(hash.h8[3]); + a40 ^= SWAP8(hash.h8[4]); + a01 ^= SWAP8(hash.h8[5]); + a11 ^= SWAP8(hash.h8[6]); + a21 ^= SWAP8(hash.h8[7]); + a31 ^= 0x8000000000000001; + KECCAK_F_1600; + // Finalize the "lane complement" + a10 = ~a10; + a20 = ~a20; + + hash.h8[0] = SWAP8(a00); + hash.h8[1] = SWAP8(a10); + hash.h8[2] = SWAP8(a20); + hash.h8[3] = SWAP8(a30); + hash.h8[4] = SWAP8(a40); + hash.h8[5] = SWAP8(a01); + hash.h8[6] = SWAP8(a11); + hash.h8[7] = SWAP8(a21); + + } else { + + // jh + + sph_u64 h0h = C64e(0x6fd14b963e00aa17), h0l = C64e(0x636a2e057a15d543), h1h = C64e(0x8a225e8d0c97ef0b), h1l = C64e(0xe9341259f2b3c361), h2h = C64e(0x891da0c1536f801e), h2l = C64e(0x2aa9056bea2b6d80), h3h = C64e(0x588eccdb2075baa6), h3l = C64e(0xa90f3a76baf83bf7); + sph_u64 h4h = C64e(0x0169e60541e34a69), h4l = C64e(0x46b58a8e2e6fe65a), h5h = C64e(0x1047a7d0c1843c24), h5l = C64e(0x3b6e71b12d5ac199), h6h = C64e(0xcf57f6ec9db1f856), h6l = C64e(0xa706887c5716b156), h7h = C64e(0xe3c2fcdfe68517fb), h7l = C64e(0x545a4678cc8cdd4b); + sph_u64 tmp; + + for(int i = 0; i < 2; i++) + { + if (i == 0) { + h0h ^= DEC64E(hash.h8[0]); + h0l ^= DEC64E(hash.h8[1]); + h1h ^= DEC64E(hash.h8[2]); + h1l ^= DEC64E(hash.h8[3]); + h2h ^= DEC64E(hash.h8[4]); + h2l ^= DEC64E(hash.h8[5]); + h3h ^= DEC64E(hash.h8[6]); + h3l ^= DEC64E(hash.h8[7]); + } else if(i == 1) { + h4h ^= DEC64E(hash.h8[0]); + h4l ^= DEC64E(hash.h8[1]); + h5h ^= DEC64E(hash.h8[2]); + h5l ^= DEC64E(hash.h8[3]); + h6h ^= DEC64E(hash.h8[4]); + h6l ^= DEC64E(hash.h8[5]); + h7h ^= DEC64E(hash.h8[6]); + h7l ^= DEC64E(hash.h8[7]); + + h0h ^= 0x80; + h3l ^= 0x2000000000000; + } + E8; + } + h4h ^= 0x80; + h7l ^= 0x2000000000000; + + hash.h8[0] = DEC64E(h4h); + hash.h8[1] = DEC64E(h4l); + hash.h8[2] = DEC64E(h5h); + hash.h8[3] = DEC64E(h5l); + hash.h8[4] = DEC64E(h6h); + hash.h8[5] = DEC64E(h6l); + hash.h8[6] = DEC64E(h7h); + hash.h8[7] = DEC64E(h7l); + + } + + bool result = (SWAP8(hash.h8[3]) <= target); + if (result) + output[output[0xFF]++] = SWAP4(gid); +} + +#endif // QUARKCOIN_CL diff --git a/miner.h b/miner.h index f9360d16..73e717c5 100644 --- a/miner.h +++ b/miner.h @@ -382,6 +382,7 @@ enum cl_kernels { KL_PSW, KL_ZUIKKIS, KL_DARKCOIN, + KL_QUARKCOIN, }; enum dev_reason { diff --git a/ocl.c b/ocl.c index c05dd68e..38d76d59 100644 --- a/ocl.c +++ b/ocl.c @@ -459,6 +459,11 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize) strcpy(filename, DARKCOIN_KERNNAME".cl"); strcpy(binaryfilename, DARKCOIN_KERNNAME); break; + case KL_QUARKCOIN: + applog(LOG_WARNING, "Kernel quarkcoin is experimental."); + strcpy(filename, QUARKCOIN_KERNNAME".cl"); + strcpy(binaryfilename, QUARKCOIN_KERNNAME); + break; case KL_NONE: /* Shouldn't happen */ break; } diff --git a/quarkcoin.c b/quarkcoin.c new file mode 100644 index 00000000..68590dfc --- /dev/null +++ b/quarkcoin.c @@ -0,0 +1,225 @@ +/*- + * Copyright 2014 phm + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "config.h" +#include "miner.h" + +#include +#include +#include + +#include "sph/sph_blake.h" +#include "sph/sph_bmw.h" +#include "sph/sph_groestl.h" +#include "sph/sph_skein.h" +#include "sph/sph_jh.h" +#include "sph/sph_keccak.h" + +/* + * Encode a length len/4 vector of (uint32_t) into a length len vector of + * (unsigned char) in big-endian form. Assumes len is a multiple of 4. + */ +static inline void +be32enc_vect(uint32_t *dst, const uint32_t *src, uint32_t len) +{ + uint32_t i; + + for (i = 0; i < len; i++) + dst[i] = htobe32(src[i]); +} + + +inline void quarkhash(void *state, const void *input) +{ + sph_blake512_context ctx_blake; + sph_bmw512_context ctx_bmw; + sph_groestl512_context ctx_groestl; + sph_jh512_context ctx_jh; + sph_keccak512_context ctx_keccak; + sph_skein512_context ctx_skein; + + unsigned char hash[64]; + + sph_blake512_init(&ctx_blake); + // ZBLAKE; + sph_blake512 (&ctx_blake, input, 80); + sph_blake512_close(&ctx_blake, (void*) hash); + + sph_bmw512_init(&ctx_bmw); + // ZBMW; + sph_bmw512 (&ctx_bmw, (const void*) hash, 64); + sph_bmw512_close(&ctx_bmw, (void*) hash); + + if (hash[0] & 0x8) + { + sph_groestl512_init(&ctx_groestl); + // ZGROESTL; + sph_groestl512 (&ctx_groestl, (const void*) hash, 64); + sph_groestl512_close(&ctx_groestl, (void*) hash); + } + else + { + sph_skein512_init(&ctx_skein); + // ZSKEIN; + sph_skein512 (&ctx_skein, (const void*) hash, 64); + sph_skein512_close(&ctx_skein, (void*) hash); + } + + sph_groestl512_init(&ctx_groestl); + // ZGROESTL; + sph_groestl512 (&ctx_groestl, (const void*) hash, 64); + sph_groestl512_close(&ctx_groestl, (void*) hash); + + sph_jh512_init(&ctx_jh); + // ZJH; + sph_jh512 (&ctx_jh, (const void*) hash, 64); + sph_jh512_close(&ctx_jh, (void*) hash); + + if (hash[0] & 0x8) + { + sph_blake512_init(&ctx_blake); + // ZBLAKE; + sph_blake512 (&ctx_blake, (const void*) hash, 64); + sph_blake512_close(&ctx_blake, (void*) hash); + } + else + { + sph_bmw512_init(&ctx_bmw); + // ZBMW; + sph_bmw512 (&ctx_bmw, (const void*) hash, 64); + sph_bmw512_close(&ctx_bmw, (void*) hash); + } + + sph_keccak512_init(&ctx_keccak); + // ZKECCAK; + sph_keccak512 (&ctx_keccak, (const void*) hash, 64); + sph_keccak512_close(&ctx_keccak, (void*) hash); + + sph_skein512_init(&ctx_skein); + // SKEIN; + sph_skein512 (&ctx_skein, (const void*) hash, 64); + sph_skein512_close(&ctx_skein, (void*) hash); + + if (hash[0] & 0x8) + { + sph_keccak512_init(&ctx_keccak); + // ZKECCAK; + sph_keccak512 (&ctx_keccak, (const void*) hash, 64); + sph_keccak512_close(&ctx_keccak, (void*) hash); + } + else + { + sph_jh512_init(&ctx_jh); + // ZJH; + sph_jh512 (&ctx_jh, (const void*) hash, 64); + sph_jh512_close(&ctx_jh, (void*) hash); + } + + memcpy(state, hash, 32); +} + +static const uint32_t diff1targ = 0x0000ffff; + + +/* Used externally as confirmation of correct OCL code */ +int quarkcoin_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce) +{ + uint32_t tmp_hash7, Htarg = le32toh(((const uint32_t *)ptarget)[7]); + uint32_t data[20], ohash[8]; + //char *scratchbuf; + + be32enc_vect(data, (const uint32_t *)pdata, 19); + data[19] = htobe32(nonce); + //scratchbuf = alloca(SCRATCHBUF_SIZE); + quarkhash(ohash, data); + tmp_hash7 = be32toh(ohash[7]); + + applog(LOG_DEBUG, "htarget %08lx diff1 %08lx hash %08lx", + (long unsigned int)Htarg, + (long unsigned int)diff1targ, + (long unsigned int)tmp_hash7); + if (tmp_hash7 > diff1targ) + return -1; + if (tmp_hash7 > Htarg) + return 0; + return 1; +} + +void quarkcoin_regenhash(struct work *work) +{ + uint32_t data[20]; + char *scratchbuf; + uint32_t *nonce = (uint32_t *)(work->data + 76); + uint32_t *ohash = (uint32_t *)(work->hash); + + be32enc_vect(data, (const uint32_t *)work->data, 19); + data[19] = htobe32(*nonce); + quarkhash(ohash, data); +} + +bool scanhash_quarkcoin(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate, + unsigned char *pdata, unsigned char __maybe_unused *phash1, + unsigned char __maybe_unused *phash, const unsigned char *ptarget, + uint32_t max_nonce, uint32_t *last_nonce, uint32_t n) +{ + uint32_t *nonce = (uint32_t *)(pdata + 76); + char *scratchbuf; + uint32_t data[20]; + uint32_t tmp_hash7; + uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]); + bool ret = false; + + be32enc_vect(data, (const uint32_t *)pdata, 19); + + while(1) { + uint32_t ostate[8]; + + *nonce = ++n; + data[19] = (n); + quarkhash(ostate, data); + tmp_hash7 = (ostate[7]); + + applog(LOG_INFO, "data7 %08lx", + (long unsigned int)data[7]); + + if (unlikely(tmp_hash7 <= Htarg)) { + ((uint32_t *)pdata)[19] = htobe32(n); + *last_nonce = n; + ret = true; + break; + } + + if (unlikely((n >= max_nonce) || thr->work_restart)) { + *last_nonce = n; + break; + } + } + + return ret; +} + + + diff --git a/quarkcoin.h b/quarkcoin.h new file mode 100644 index 00000000..f01839bb --- /dev/null +++ b/quarkcoin.h @@ -0,0 +1,10 @@ +#ifndef QUARKCOIN_H +#define QUARKCOIN_H + +#include "miner.h" + +extern int quarkcoin_test(unsigned char *pdata, const unsigned char *ptarget, + uint32_t nonce); +extern void quarkcoin_regenhash(struct work *work); + +#endif /* QUARKCOIN_H */ diff --git a/sgminer.c b/sgminer.c index dadb7e31..830805e3 100644 --- a/sgminer.c +++ b/sgminer.c @@ -4227,6 +4227,9 @@ void write_config(FILE *fcfg) case KL_DARKCOIN: fprintf(fcfg, DARKCOIN_KERNNAME); break; + case KL_QUARKCOIN: + fprintf(fcfg, QUARKCOIN_KERNNAME); + break; } } @@ -6034,6 +6037,9 @@ static void rebuild_nonce(struct work *work, uint32_t nonce) case KL_DARKCOIN: darkcoin_regenhash(work); break; + case KL_QUARKCOIN: + quarkcoin_regenhash(work); + break; default: scrypt_regenhash(work); break; @@ -6085,7 +6091,6 @@ static void update_work_stats(struct thr_info *thr, struct work *work) mutex_lock(&stats_lock); total_diff1 += work->device_diff; -applog(LOG_DEBUG, "total_diff1: %lf\n", total_diff1); thr->cgpu->diff1 += work->device_diff; work->pool->diff1 += work->device_diff; thr->cgpu->last_device_valid_work = time(NULL); From 60a0e002432aedec757c5f3571a7d268fd56a2fb Mon Sep 17 00:00:00 2001 From: phm Date: Sat, 22 Feb 2014 11:04:06 +0100 Subject: [PATCH 2/2] Added support for quarkcoin difficulty calculation. --- driver-opencl.c | 10 +++++++--- miner.h | 8 +++++++- sgminer.c | 21 +++++++++++---------- 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/driver-opencl.c b/driver-opencl.c index a2333c3a..9ba81a31 100644 --- a/driver-opencl.c +++ b/driver-opencl.c @@ -48,7 +48,7 @@ extern bool opt_loginput; extern char *opt_kernel_path; extern int gpur_thr_id; extern bool opt_noadl; -extern bool is_scrypt; +extern enum diff_calc_mode dm_mode; extern void *miner_thread(void *userdata); extern int dev_from_id(int thr_id); @@ -228,8 +228,12 @@ char *set_kernel(char *arg) if (kern == KL_NONE) return "Invalid parameter to set_kernel"; gpus[device++].kernel = kern; -// if (kern >= KL_DARKCOIN) -// is_scrypt = false; + if (kern >= KL_DARKCOIN) + dm_mode = DM_BITCOIN; + else if(kern >= KL_QUARKCOIN) + dm_mode = DM_QUARKCOIN; + else + dm_mode = DM_LITECOIN; while ((nextptr = strtok(NULL, ",")) != NULL) { kern = select_kernel(nextptr); diff --git a/miner.h b/miner.h index 73e717c5..28bbf649 100644 --- a/miner.h +++ b/miner.h @@ -381,8 +381,8 @@ enum cl_kernels { KL_CKOLIVAS, KL_PSW, KL_ZUIKKIS, - KL_DARKCOIN, KL_QUARKCOIN, + KL_DARKCOIN, }; enum dev_reason { @@ -1500,4 +1500,10 @@ extern struct api_data *api_add_diff(struct api_data *root, char *name, double * extern struct api_data *api_add_percent(struct api_data *root, char *name, double *data, bool copy_data); extern struct api_data *api_add_avg(struct api_data *root, char *name, float *data, bool copy_data); +enum diff_calc_mode { + DM_BITCOIN, + DM_QUARKCOIN, + DM_LITECOIN, +}; + #endif /* __MINER_H__ */ diff --git a/sgminer.c b/sgminer.c index 830805e3..a042488d 100644 --- a/sgminer.c +++ b/sgminer.c @@ -299,7 +299,10 @@ struct schedtime { struct schedtime schedstart; struct schedtime schedstop; bool sched_paused; -bool is_scrypt = true; + +#define DM_SELECT(x, y, z) (dm_mode == DM_BITCOIN ? x : (dm_mode == DM_QUARKCOIN ? y : z)) + +enum diff_calc_mode dm_mode = DM_LITECOIN; static bool time_before(struct tm *tm1, struct tm *tm2) { @@ -2959,7 +2962,7 @@ static void calc_diff(struct work *work, double known) else { double d64, dcut64; - d64 = (is_scrypt ? (double)65536 * truediffone : truediffone); + d64 = (double) DM_SELECT(1, 256, 65536) * truediffone; dcut64 = le256todouble(work->target); if (unlikely(!dcut64)) @@ -3576,7 +3579,7 @@ static double share_diff(const struct work *work) double d64, s64; double ret; - d64 = (is_scrypt ? (double)65536 * truediffone : truediffone); + d64 = (double) DM_SELECT(1, 256, 65536) * truediffone; s64 = le256todouble(work->hash); if (unlikely(!s64)) s64 = 0; @@ -3899,7 +3902,7 @@ static void set_blockdiff(const struct work *work) uint8_t pow = work->data[72]; int powdiff = (8 * (0x1d - 3)) - (8 * (pow - 3)); uint32_t diff32 = be32toh(*((uint32_t *)(work->data + 72))) & 0x00FFFFFF; - double numerator = (is_scrypt ? 0xFFFFFFFFULL : 0xFFFFULL) << powdiff; + double numerator = DM_SELECT(0xFFFFULL, 0xFFFFFFULL, 0xFFFFFFFFULL) << powdiff; double ddiff = numerator / (double)diff32; if (unlikely(current_diff != ddiff)) { @@ -5818,7 +5821,7 @@ void set_target(unsigned char *dest_target, double diff) } // FIXME: is target set right? - d64 = (is_scrypt ? (double)65536 * truediffone : truediffone); + d64 = (double) DM_SELECT(1, 256, 65536) * truediffone; d64 /= diff; dcut64 = d64 / bits192; @@ -6064,7 +6067,7 @@ bool test_nonce_diff(struct work *work, uint32_t nonce, double diff) uint64_t *hash64 = (uint64_t *)(work->hash + 24), diff64; rebuild_nonce(work, nonce); - diff64 = (is_scrypt ? 0x0000ffff00000000ULL : 0x00000000ffff0000ULL); + diff64 = DM_SELECT(0x00000000ffff0000ULL, 0x000000ffff000000ULL, 0x0000ffff00000000ULL); diff64 /= diff; return (le64toh(*hash64) <= diff64); @@ -6073,13 +6076,11 @@ bool test_nonce_diff(struct work *work, uint32_t nonce, double diff) static void update_work_stats(struct thr_info *thr, struct work *work) { double test_diff = current_diff; - if (is_scrypt) - test_diff *= 65536; + test_diff *= DM_SELECT(1, 256, 65536); work->share_diff = share_diff(work); - if (is_scrypt) - test_diff *= 65536; + test_diff *= DM_SELECT(1, 256, 65536); if (unlikely(work->share_diff >= test_diff)) { work->block = true;