Browse Source

Merge pull request #282 from ystarnaud/x11mod

Updated x11 and x13 kernels
djm34
ystarnaud 11 years ago
parent
commit
707ddd6c4a
  1. 4
      api.c
  2. 161
      config_parser.c
  3. 7
      config_parser.h
  4. 3
      driver-opencl.c
  5. 8163
      kernel/darkcoin-mod.cl
  6. 504
      kernel/marucoin-mod.cl
  7. 2
      miner.h
  8. 102
      sgminer.c

4
api.c

@ -3585,7 +3585,7 @@ void api(int api_thr_id)
struct sockaddr_in cli; struct sockaddr_in cli;
socklen_t clisiz; socklen_t clisiz;
char cmdbuf[100]; char cmdbuf[100];
char *cmd = NULL, *cmdptr, *cmdsbuf; char *cmd = NULL, *cmdptr, *cmdsbuf = NULL;
char *param; char *param;
bool addrok; bool addrok;
char group; char group;
@ -3593,7 +3593,7 @@ void api(int api_thr_id)
json_t *json_config = NULL; json_t *json_config = NULL;
json_t *json_val; json_t *json_val;
bool isjson; bool isjson;
bool did, isjoin, firstjoin; bool did, isjoin = false, firstjoin;
int i; int i;
SOCKETTYPE *apisock; SOCKETTYPE *apisock;

161
config_parser.c

@ -83,6 +83,7 @@ static struct profile *add_profile()
//default profile name is the profile index //default profile name is the profile index
sprintf(buf, "%d", profile->profile_no); sprintf(buf, "%d", profile->profile_no);
profile->name = strdup(buf); profile->name = strdup(buf);
profile->algorithm.name[0] = '\0';
profiles = (struct profile **)realloc(profiles, sizeof(struct profile *) * (total_profiles + 2)); profiles = (struct profile **)realloc(profiles, sizeof(struct profile *) * (total_profiles + 2));
profiles[total_profiles++] = profile; profiles[total_profiles++] = profile;
@ -146,6 +147,22 @@ static struct profile *get_profile(char *name)
} }
/******* Default profile functions used during config parsing *****/ /******* Default profile functions used during config parsing *****/
char *set_default_algorithm(const char *arg)
{
set_algorithm(&default_profile.algorithm, arg);
applog(LOG_INFO, "Set default algorithm to %s", default_profile.algorithm.name);
return NULL;
}
char *set_default_nfactor(const char *arg)
{
set_algorithm_nfactor(&default_profile.algorithm, (const uint8_t) atoi(arg));
applog(LOG_INFO, "Set algorithm N-factor to %d (N to %d)", default_profile.algorithm.nfactor);
return NULL;
}
char *set_default_devices(const char *arg) char *set_default_devices(const char *arg)
{ {
default_profile.devices = arg; default_profile.devices = arg;
@ -696,19 +713,24 @@ void load_default_profile()
//apply default settings //apply default settings
void apply_defaults() void apply_defaults()
{ {
set_algorithm(&opt_algorithm, default_profile.algorithm.name); //if no algorithm specified, use scrypt as default
if (empty_string(default_profile.algorithm.name))
set_algorithm(&default_profile.algorithm, "scrypt");
if (!empty_string(default_profile.devices)) //by default all unless specified
if (empty_string(default_profile.devices))
default_profile.devices = strdup("all");
set_devices((char *)default_profile.devices); set_devices((char *)default_profile.devices);
if (!empty_string(default_profile.intensity)) //set raw intensity first
set_intensity(default_profile.intensity);
if (!empty_string(default_profile.xintensity))
set_xintensity(default_profile.xintensity);
if (!empty_string(default_profile.rawintensity)) if (!empty_string(default_profile.rawintensity))
set_rawintensity(default_profile.rawintensity); set_rawintensity(default_profile.rawintensity);
//then try xintensity
else if (!empty_string(default_profile.xintensity))
set_xintensity(default_profile.xintensity);
//then try intensity
else if (!empty_string(default_profile.intensity))
set_intensity(default_profile.intensity);
if (!empty_string(default_profile.lookup_gap)) if (!empty_string(default_profile.lookup_gap))
set_lookup_gap((char *)default_profile.lookup_gap); set_lookup_gap((char *)default_profile.lookup_gap);
@ -758,68 +780,167 @@ void apply_pool_profile(struct pool *pool)
{ {
struct profile *profile; struct profile *profile;
//if the pool has a profile set //if the pool has a profile set load it
if(!empty_string(pool->profile)) if(!empty_string(pool->profile))
{ {
applog(LOG_DEBUG, "Loading settings from profile \"%s\" for pool %i", pool->profile, pool->pool_no); applog(LOG_DEBUG, "Loading settings from profile \"%s\" for pool %i", pool->profile, pool->pool_no);
//find profile and apply settings to the pool //find profile and apply settings to the pool
if((profile = get_profile(pool->profile))) if(!(profile = get_profile(pool->profile)))
{
//if not found, remove profile name and use default profile.
applog(LOG_DEBUG, "Profile load failed for pool %i: profile %s not found. Using default profile.", pool->pool_no, pool->profile);
//remove profile name
pool->profile[0] = '\0';
profile = &default_profile;
}
}
//no profile specified in pool, use default profile
else
{ {
applog(LOG_DEBUG, "Loading settings from default_profile for pool %i", pool->profile, pool->pool_no);
profile = &default_profile;
}
//only apply profiles settings not already defined in the pool
//if no algorithm is specified, use profile's or default profile's
if(empty_string(pool->algorithm.name))
{
if(!empty_string(profile->algorithm.name))
pool->algorithm = profile->algorithm; pool->algorithm = profile->algorithm;
else
pool->algorithm = default_profile.algorithm;
}
applog(LOG_DEBUG, "Pool %i Algorithm set to \"%s\"", pool->pool_no, pool->algorithm.name); applog(LOG_DEBUG, "Pool %i Algorithm set to \"%s\"", pool->pool_no, pool->algorithm.name);
if(pool_cmp(pool->devices, default_profile.devices))
{
if(!empty_string(profile->devices))
pool->devices = profile->devices; pool->devices = profile->devices;
else
pool->devices = default_profile.devices;
}
applog(LOG_DEBUG, "Pool %i devices set to \"%s\"", pool->pool_no, pool->devices); applog(LOG_DEBUG, "Pool %i devices set to \"%s\"", pool->pool_no, pool->devices);
if(pool_cmp(pool->lookup_gap, default_profile.lookup_gap))
{
if(!empty_string(profile->lookup_gap))
pool->lookup_gap = profile->lookup_gap; pool->lookup_gap = profile->lookup_gap;
else
pool->lookup_gap = default_profile.lookup_gap;
}
applog(LOG_DEBUG, "Pool %i lookup gap set to \"%s\"", pool->pool_no, pool->lookup_gap); applog(LOG_DEBUG, "Pool %i lookup gap set to \"%s\"", pool->pool_no, pool->lookup_gap);
if(pool_cmp(pool->intensity, default_profile.intensity))
{
if(!empty_string(profile->intensity))
pool->intensity = profile->intensity; pool->intensity = profile->intensity;
else
pool->intensity = default_profile.intensity;
}
applog(LOG_DEBUG, "Pool %i Intensity set to \"%s\"", pool->pool_no, pool->intensity); applog(LOG_DEBUG, "Pool %i Intensity set to \"%s\"", pool->pool_no, pool->intensity);
if(pool_cmp(pool->xintensity, default_profile.xintensity))
{
if(!empty_string(profile->xintensity))
pool->xintensity = profile->xintensity; pool->xintensity = profile->xintensity;
else
pool->xintensity = default_profile.xintensity;
}
applog(LOG_DEBUG, "Pool %i XIntensity set to \"%s\"", pool->pool_no, pool->xintensity); applog(LOG_DEBUG, "Pool %i XIntensity set to \"%s\"", pool->pool_no, pool->xintensity);
if(pool_cmp(pool->rawintensity, default_profile.rawintensity))
{
if(!empty_string(profile->rawintensity))
pool->rawintensity = profile->rawintensity; pool->rawintensity = profile->rawintensity;
else
pool->rawintensity = default_profile.rawintensity;
}
applog(LOG_DEBUG, "Pool %i Raw Intensity set to \"%s\"", pool->pool_no, pool->rawintensity); applog(LOG_DEBUG, "Pool %i Raw Intensity set to \"%s\"", pool->pool_no, pool->rawintensity);
if(pool_cmp(pool->thread_concurrency, default_profile.thread_concurrency))
{
if(!empty_string(profile->thread_concurrency))
pool->thread_concurrency = profile->thread_concurrency; pool->thread_concurrency = profile->thread_concurrency;
else
pool->thread_concurrency = default_profile.thread_concurrency;
}
applog(LOG_DEBUG, "Pool %i Thread Concurrency set to \"%s\"", pool->pool_no, pool->thread_concurrency); applog(LOG_DEBUG, "Pool %i Thread Concurrency set to \"%s\"", pool->pool_no, pool->thread_concurrency);
#ifdef HAVE_ADL #ifdef HAVE_ADL
if(pool_cmp(pool->gpu_engine, default_profile.gpu_engine))
{
if(!empty_string(profile->gpu_engine))
pool->gpu_engine = profile->gpu_engine; pool->gpu_engine = profile->gpu_engine;
else
pool->gpu_engine = default_profile.gpu_engine;
}
applog(LOG_DEBUG, "Pool %i GPU Clock set to \"%s\"", pool->pool_no, pool->gpu_engine); applog(LOG_DEBUG, "Pool %i GPU Clock set to \"%s\"", pool->pool_no, pool->gpu_engine);
if(pool_cmp(pool->gpu_memclock, default_profile.gpu_memclock))
{
if(!empty_string(profile->gpu_memclock))
pool->gpu_memclock = profile->gpu_memclock; pool->gpu_memclock = profile->gpu_memclock;
else
pool->gpu_memclock = default_profile.gpu_memclock;
}
applog(LOG_DEBUG, "Pool %i GPU Memory clock set to \"%s\"", pool->pool_no, pool->gpu_memclock); applog(LOG_DEBUG, "Pool %i GPU Memory clock set to \"%s\"", pool->pool_no, pool->gpu_memclock);
if(pool_cmp(pool->gpu_threads, default_profile.gpu_threads))
{
if(!empty_string(profile->gpu_threads))
pool->gpu_threads = profile->gpu_threads; pool->gpu_threads = profile->gpu_threads;
else
pool->gpu_threads = default_profile.gpu_threads;
}
applog(LOG_DEBUG, "Pool %i GPU Threads set to \"%s\"", pool->pool_no, pool->gpu_threads); applog(LOG_DEBUG, "Pool %i GPU Threads set to \"%s\"", pool->pool_no, pool->gpu_threads);
if(pool_cmp(pool->gpu_fan, default_profile.gpu_fan))
{
if(!empty_string(profile->gpu_fan))
pool->gpu_fan = profile->gpu_fan; pool->gpu_fan = profile->gpu_fan;
else
pool->gpu_fan = default_profile.gpu_fan;
}
applog(LOG_DEBUG, "Pool %i GPU Fan set to \"%s\"", pool->pool_no, pool->gpu_fan); applog(LOG_DEBUG, "Pool %i GPU Fan set to \"%s\"", pool->pool_no, pool->gpu_fan);
if(pool_cmp(pool->gpu_powertune, default_profile.gpu_powertune))
{
if(!empty_string(profile->gpu_powertune))
pool->gpu_powertune = profile->gpu_powertune; pool->gpu_powertune = profile->gpu_powertune;
else
pool->gpu_powertune = default_profile.gpu_powertune;
}
applog(LOG_DEBUG, "Pool %i GPU Powertune set to \"%s\"", pool->pool_no, pool->gpu_powertune); applog(LOG_DEBUG, "Pool %i GPU Powertune set to \"%s\"", pool->pool_no, pool->gpu_powertune);
if(pool_cmp(pool->gpu_vddc, default_profile.gpu_vddc))
{
if(!empty_string(profile->gpu_vddc))
pool->gpu_vddc = profile->gpu_vddc; pool->gpu_vddc = profile->gpu_vddc;
else
pool->gpu_vddc = default_profile.gpu_vddc;
}
applog(LOG_DEBUG, "Pool %i GPU Vddc set to \"%s\"", pool->pool_no, pool->gpu_vddc); applog(LOG_DEBUG, "Pool %i GPU Vddc set to \"%s\"", pool->pool_no, pool->gpu_vddc);
#endif #endif
if(pool_cmp(pool->shaders, default_profile.shaders))
{
if(!empty_string(profile->shaders))
pool->shaders = profile->shaders; pool->shaders = profile->shaders;
else
pool->shaders = default_profile.shaders;
}
applog(LOG_DEBUG, "Pool %i Shaders set to \"%s\"", pool->pool_no, pool->shaders); applog(LOG_DEBUG, "Pool %i Shaders set to \"%s\"", pool->pool_no, pool->shaders);
if(pool_cmp(pool->worksize, default_profile.worksize))
{
if(!empty_string(profile->worksize))
pool->worksize = profile->worksize; pool->worksize = profile->worksize;
applog(LOG_DEBUG, "Pool %i Worksize set to \"%s\"", pool->pool_no, pool->worksize);
}
else else
{ pool->worksize = default_profile.worksize;
applog(LOG_DEBUG, "Profile load failed for pool %i: profile %s not found.", pool->pool_no, pool->profile);
//remove profile name
pool->profile[0] = '\0';
}
} }
applog(LOG_DEBUG, "Pool %i Worksize set to \"%s\"", pool->pool_no, pool->worksize);
} }
//helper function to add json values to pool object //helper function to add json values to pool object
@ -1291,7 +1412,7 @@ void write_config(const char *filename)
else else
{ {
//save algorithm name //save algorithm name
if(json_object_set(config, "algorithm", json_string(opt_algorithm.name)) == -1) if(json_object_set(config, "algorithm", json_string(default_profile.algorithm.name)) == -1)
{ {
applog(LOG_ERR, "Error: config_parser::write_config():\n json_object_set() failed on algorithm"); applog(LOG_ERR, "Error: config_parser::write_config():\n json_object_set() failed on algorithm");
return; return;

7
config_parser.h

@ -12,7 +12,10 @@
#define empty_string(str) ((str && str[0] != '\0')?0:1) #define empty_string(str) ((str && str[0] != '\0')?0:1)
#endif #endif
#ifndef safe_cmp #ifndef safe_cmp
#define safe_cmp(val1, val2) (((val1 && strcmp(val1, val2) != 0) || empty_string(val1))?1:0) #define safe_cmp(val1, val2) (((val1 && strcasecmp(val1, val2) != 0) || empty_string(val1))?1:0)
#endif
#ifndef pool_cmp
#define pool_cmp(val1, val2) (((val1 && val2 && strcasecmp(val1, val2) == 0) || empty_string(val1))?1:0)
#endif #endif
#ifndef isnull #ifndef isnull
#define isnull(str, default_str) ((str == NULL)?default_str:str) #define isnull(str, default_str) ((str == NULL)?default_str:str)
@ -60,6 +63,8 @@ extern struct profile **profiles;
extern int total_profiles; extern int total_profiles;
/* option parser functions */ /* option parser functions */
extern char *set_default_algorithm(const char *arg);
extern char *set_default_nfactor(const char *arg);
extern char *set_default_devices(const char *arg); extern char *set_default_devices(const char *arg);
extern char *set_default_lookup_gap(const char *arg); extern char *set_default_lookup_gap(const char *arg);
extern char *set_default_intensity(const char *arg); extern char *set_default_intensity(const char *arg);

3
driver-opencl.c

@ -28,6 +28,7 @@
#include "compat.h" #include "compat.h"
#include "miner.h" #include "miner.h"
#include "config_parser.h"
#include "driver-opencl.h" #include "driver-opencl.h"
#include "findnonce.h" #include "findnonce.h"
#include "ocl.h" #include "ocl.h"
@ -1161,7 +1162,7 @@ static void opencl_detect(bool hotplug)
cgpu->threads = 1; cgpu->threads = 1;
#endif #endif
cgpu->virtual_gpu = i; cgpu->virtual_gpu = i;
cgpu->algorithm = opt_algorithm; cgpu->algorithm = default_profile.algorithm;
add_cgpu(cgpu); add_cgpu(cgpu);
} }

8163
kernel/darkcoin-mod.cl

File diff suppressed because it is too large Load Diff

504
kernel/marucoin-mod.cl

@ -34,9 +34,9 @@
#define X13MOD_CL #define X13MOD_CL
#if __ENDIAN_LITTLE__ #if __ENDIAN_LITTLE__
#define SPH_LITTLE_ENDIAN 1 #define SPH_LITTLE_ENDIAN 1
#else #else
#define SPH_BIG_ENDIAN 1 #define SPH_BIG_ENDIAN 1
#endif #endif
#define SPH_UPTR sph_u64 #define SPH_UPTR sph_u64
@ -44,24 +44,24 @@
typedef unsigned int sph_u32; typedef unsigned int sph_u32;
typedef int sph_s32; typedef int sph_s32;
#ifndef __OPENCL_VERSION__ #ifndef __OPENCL_VERSION__
typedef unsigned long long sph_u64; typedef unsigned long long sph_u64;
typedef long long sph_s64; typedef long long sph_s64;
#else #else
typedef unsigned long sph_u64; typedef unsigned long sph_u64;
typedef long sph_s64; typedef long sph_s64;
#endif #endif
#define SPH_64 1 #define SPH_64 1
#define SPH_64_TRUE 1 #define SPH_64_TRUE 1
#define SPH_C32(x) ((sph_u32)(x ## U)) #define SPH_C32(x) ((sph_u32)(x ## U))
#define SPH_T32(x) (as_uint(x)) #define SPH_T32(x) ((as_uint(x)) & SPH_C32(0xFFFFFFFF))
#define SPH_ROTL32(x, n) rotate(as_uint(x), as_uint(n)) #define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) #define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n)))
#define SPH_C64(x) ((sph_u64)(x ## UL)) #define SPH_C64(x) ((sph_u64)(x ## UL))
#define SPH_T64(x) (as_ulong(x)) #define SPH_T64(x) ((as_ulong(x)) & SPH_C64(0xFFFFFFFFFFFFFFFF))
#define SPH_ROTL64(x, n) rotate(as_ulong(x), (n) & 0xFFFFFFFFFFFFFFFFUL) #define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n))))
#define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n))) #define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n)))
#define SPH_ECHO_64 1 #define SPH_ECHO_64 1
@ -76,7 +76,7 @@ typedef long sph_s64;
#define SPH_CUBEHASH_UNROLL 0 #define SPH_CUBEHASH_UNROLL 0
#define SPH_KECCAK_UNROLL 0 #define SPH_KECCAK_UNROLL 0
#if !defined SPH_HAMSI_EXPAND_BIG #if !defined SPH_HAMSI_EXPAND_BIG
#define SPH_HAMSI_EXPAND_BIG 4 #define SPH_HAMSI_EXPAND_BIG 4
#endif #endif
#include "blake.cl" #include "blake.cl"
@ -104,6 +104,14 @@ typedef long sph_s64;
#define DEC64BE(x) SWAP8(*(const __global sph_u64 *) (x)); #define DEC64BE(x) SWAP8(*(const __global sph_u64 *) (x));
#endif #endif
#define SHL(x, n) ((x) << (n))
#define SHR(x, n) ((x) >> (n))
#define CONST_EXP2 q[i+0] + SPH_ROTL64(q[i+1], 5) + q[i+2] + SPH_ROTL64(q[i+3], 11) + \
q[i+4] + SPH_ROTL64(q[i+5], 27) + q[i+6] + SPH_ROTL64(q[i+7], 32) + \
q[i+8] + SPH_ROTL64(q[i+9], 37) + q[i+10] + SPH_ROTL64(q[i+11], 43) + \
q[i+12] + SPH_ROTL64(q[i+13], 53) + (SHR(q[i+14],1) ^ q[i+14]) + (SHR(q[i+15],2) ^ q[i+15])
typedef union { typedef union {
unsigned char h1[64]; unsigned char h1[64];
uint h4[16]; uint h4[16];
@ -125,9 +133,8 @@ __kernel void search(__global unsigned char* block, __global hash_t* hashes)
sph_u64 T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + (80 << 3), T1 = 0xFFFFFFFFFFFFFFFF;; sph_u64 T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + (80 << 3), T1 = 0xFFFFFFFFFFFFFFFF;;
if ((T0 = SPH_T64(T0 + 1024)) < 1024) if ((T0 = SPH_T64(T0 + 1024)) < 1024)
{
T1 = SPH_T64(T1 + 1); T1 = SPH_T64(T1 + 1);
}
sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; sph_u64 M0, M1, M2, M3, M4, M5, M6, M7;
sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; sph_u64 M8, M9, MA, MB, MC, MD, ME, MF;
sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; sph_u64 V0, V1, V2, V3, V4, V5, V6, V7;
@ -170,58 +177,268 @@ __kernel void search1(__global hash_t* hashes)
{ {
uint gid = get_global_id(0); uint gid = get_global_id(0);
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]); __global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
// bmw // bmw
sph_u64 BMW_H[16]; sph_u64 BMW_H[16];
#pragma unroll 16
for(unsigned u = 0; u < 16; u++) for(unsigned u = 0; u < 16; u++)
BMW_H[u] = BMW_IV512[u]; BMW_H[u] = BMW_IV512[u];
sph_u64 BMW_h1[16], BMW_h2[16]; sph_u64 mv[16],q[32];
sph_u64 mv[16]; sph_u64 tmp;
mv[ 0] = SWAP8(hash->h8[0]); mv[0] = SWAP8(hash->h8[0]);
mv[ 1] = SWAP8(hash->h8[1]); mv[1] = SWAP8(hash->h8[1]);
mv[ 2] = SWAP8(hash->h8[2]); mv[2] = SWAP8(hash->h8[2]);
mv[ 3] = SWAP8(hash->h8[3]); mv[3] = SWAP8(hash->h8[3]);
mv[ 4] = SWAP8(hash->h8[4]); mv[4] = SWAP8(hash->h8[4]);
mv[ 5] = SWAP8(hash->h8[5]); mv[5] = SWAP8(hash->h8[5]);
mv[ 6] = SWAP8(hash->h8[6]); mv[6] = SWAP8(hash->h8[6]);
mv[ 7] = SWAP8(hash->h8[7]); mv[7] = SWAP8(hash->h8[7]);
mv[ 8] = 0x80; mv[8] = 0x80;
mv[ 9] = 0; mv[9] = 0;
mv[10] = 0; mv[10] = 0;
mv[11] = 0; mv[11] = 0;
mv[12] = 0; mv[12] = 0;
mv[13] = 0; mv[13] = 0;
mv[14] = 0; mv[14] = 0;
mv[15] = 0x200; mv[15] = SPH_C64(512);
#define M(x) (mv[x])
#define H(x) (BMW_H[x]) tmp = (mv[5] ^ BMW_H[5]) - (mv[7] ^ BMW_H[7]) + (mv[10] ^ BMW_H[10]) + (mv[13] ^ BMW_H[13]) + (mv[14] ^ BMW_H[14]);
#define dH(x) (BMW_h2[x]) q[0] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[1];
tmp = (mv[6] ^ BMW_H[6]) - (mv[8] ^ BMW_H[8]) + (mv[11] ^ BMW_H[11]) + (mv[14] ^ BMW_H[14]) - (mv[15] ^ BMW_H[15]);
q[1] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[2];
tmp = (mv[0] ^ BMW_H[0]) + (mv[7] ^ BMW_H[7]) + (mv[9] ^ BMW_H[9]) - (mv[12] ^ BMW_H[12]) + (mv[15] ^ BMW_H[15]);
q[2] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[3];
tmp = (mv[0] ^ BMW_H[0]) - (mv[1] ^ BMW_H[1]) + (mv[8] ^ BMW_H[8]) - (mv[10] ^ BMW_H[10]) + (mv[13] ^ BMW_H[13]);
q[3] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[4];
tmp = (mv[1] ^ BMW_H[1]) + (mv[2] ^ BMW_H[2]) + (mv[9] ^ BMW_H[9]) - (mv[11] ^ BMW_H[11]) - (mv[14] ^ BMW_H[14]);
q[4] = (SHR(tmp, 1) ^ tmp) + BMW_H[5];
tmp = (mv[3] ^ BMW_H[3]) - (mv[2] ^ BMW_H[2]) + (mv[10] ^ BMW_H[10]) - (mv[12] ^ BMW_H[12]) + (mv[15] ^ BMW_H[15]);
q[5] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[6];
tmp = (mv[4] ^ BMW_H[4]) - (mv[0] ^ BMW_H[0]) - (mv[3] ^ BMW_H[3]) - (mv[11] ^ BMW_H[11]) + (mv[13] ^ BMW_H[13]);
q[6] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[7];
tmp = (mv[1] ^ BMW_H[1]) - (mv[4] ^ BMW_H[4]) - (mv[5] ^ BMW_H[5]) - (mv[12] ^ BMW_H[12]) - (mv[14] ^ BMW_H[14]);
q[7] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[8];
tmp = (mv[2] ^ BMW_H[2]) - (mv[5] ^ BMW_H[5]) - (mv[6] ^ BMW_H[6]) + (mv[13] ^ BMW_H[13]) - (mv[15] ^ BMW_H[15]);
q[8] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[9];
tmp = (mv[0] ^ BMW_H[0]) - (mv[3] ^ BMW_H[3]) + (mv[6] ^ BMW_H[6]) - (mv[7] ^ BMW_H[7]) + (mv[14] ^ BMW_H[14]);
q[9] = (SHR(tmp, 1) ^ tmp) + BMW_H[10];
tmp = (mv[8] ^ BMW_H[8]) - (mv[1] ^ BMW_H[1]) - (mv[4] ^ BMW_H[4]) - (mv[7] ^ BMW_H[7]) + (mv[15] ^ BMW_H[15]);
q[10] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[11];
tmp = (mv[8] ^ BMW_H[8]) - (mv[0] ^ BMW_H[0]) - (mv[2] ^ BMW_H[2]) - (mv[5] ^ BMW_H[5]) + (mv[9] ^ BMW_H[9]);
q[11] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[12];
tmp = (mv[1] ^ BMW_H[1]) + (mv[3] ^ BMW_H[3]) - (mv[6] ^ BMW_H[6]) - (mv[9] ^ BMW_H[9]) + (mv[10] ^ BMW_H[10]);
q[12] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[13];
tmp = (mv[2] ^ BMW_H[2]) + (mv[4] ^ BMW_H[4]) + (mv[7] ^ BMW_H[7]) + (mv[10] ^ BMW_H[10]) + (mv[11] ^ BMW_H[11]);
q[13] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[14];
tmp = (mv[3] ^ BMW_H[3]) - (mv[5] ^ BMW_H[5]) + (mv[8] ^ BMW_H[8]) - (mv[11] ^ BMW_H[11]) - (mv[12] ^ BMW_H[12]);
q[14] = (SHR(tmp, 1) ^ tmp) + BMW_H[15];
tmp = (mv[12] ^ BMW_H[12]) - (mv[4] ^ BMW_H[4]) - (mv[6] ^ BMW_H[6]) - (mv[9] ^ BMW_H[9]) + (mv[13] ^ BMW_H[13]);
q[15] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[0];
#pragma unroll 2
for(int i=0;i<2;i++)
{
q[i+16] =
(SHR(q[i], 1) ^ SHL(q[i], 2) ^ SPH_ROTL64(q[i], 13) ^ SPH_ROTL64(q[i], 43)) +
(SHR(q[i+1], 2) ^ SHL(q[i+1], 1) ^ SPH_ROTL64(q[i+1], 19) ^ SPH_ROTL64(q[i+1], 53)) +
(SHR(q[i+2], 2) ^ SHL(q[i+2], 2) ^ SPH_ROTL64(q[i+2], 28) ^ SPH_ROTL64(q[i+2], 59)) +
(SHR(q[i+3], 1) ^ SHL(q[i+3], 3) ^ SPH_ROTL64(q[i+3], 4) ^ SPH_ROTL64(q[i+3], 37)) +
(SHR(q[i+4], 1) ^ SHL(q[i+4], 2) ^ SPH_ROTL64(q[i+4], 13) ^ SPH_ROTL64(q[i+4], 43)) +
(SHR(q[i+5], 2) ^ SHL(q[i+5], 1) ^ SPH_ROTL64(q[i+5], 19) ^ SPH_ROTL64(q[i+5], 53)) +
(SHR(q[i+6], 2) ^ SHL(q[i+6], 2) ^ SPH_ROTL64(q[i+6], 28) ^ SPH_ROTL64(q[i+6], 59)) +
(SHR(q[i+7], 1) ^ SHL(q[i+7], 3) ^ SPH_ROTL64(q[i+7], 4) ^ SPH_ROTL64(q[i+7], 37)) +
(SHR(q[i+8], 1) ^ SHL(q[i+8], 2) ^ SPH_ROTL64(q[i+8], 13) ^ SPH_ROTL64(q[i+8], 43)) +
(SHR(q[i+9], 2) ^ SHL(q[i+9], 1) ^ SPH_ROTL64(q[i+9], 19) ^ SPH_ROTL64(q[i+9], 53)) +
(SHR(q[i+10], 2) ^ SHL(q[i+10], 2) ^ SPH_ROTL64(q[i+10], 28) ^ SPH_ROTL64(q[i+10], 59)) +
(SHR(q[i+11], 1) ^ SHL(q[i+11], 3) ^ SPH_ROTL64(q[i+11], 4) ^ SPH_ROTL64(q[i+11], 37)) +
(SHR(q[i+12], 1) ^ SHL(q[i+12], 2) ^ SPH_ROTL64(q[i+12], 13) ^ SPH_ROTL64(q[i+12], 43)) +
(SHR(q[i+13], 2) ^ SHL(q[i+13], 1) ^ SPH_ROTL64(q[i+13], 19) ^ SPH_ROTL64(q[i+13], 53)) +
(SHR(q[i+14], 2) ^ SHL(q[i+14], 2) ^ SPH_ROTL64(q[i+14], 28) ^ SPH_ROTL64(q[i+14], 59)) +
(SHR(q[i+15], 1) ^ SHL(q[i+15], 3) ^ SPH_ROTL64(q[i+15], 4) ^ SPH_ROTL64(q[i+15], 37)) +
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) +
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]);
}
#pragma unroll 4
for(int i=2;i<6;i++)
{
q[i+16] = CONST_EXP2 +
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) +
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]);
}
#pragma unroll 3
for(int i=6;i<9;i++)
{
q[i+16] = CONST_EXP2 +
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) +
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i+7]);
}
#pragma unroll 4
for(int i=9;i<13;i++)
{
q[i+16] = CONST_EXP2 +
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) +
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]);
}
#pragma unroll 3
for(int i=13;i<16;i++)
{
q[i+16] = CONST_EXP2 +
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) +
SPH_ROTL64(mv[i-13], (i-13)+1) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]);
}
FOLDb; sph_u64 XL64 = q[16]^q[17]^q[18]^q[19]^q[20]^q[21]^q[22]^q[23];
sph_u64 XH64 = XL64^q[24]^q[25]^q[26]^q[27]^q[28]^q[29]^q[30]^q[31];
BMW_H[0] = (SHL(XH64, 5) ^ SHR(q[16],5) ^ mv[0]) + ( XL64 ^ q[24] ^ q[0]);
BMW_H[1] = (SHR(XH64, 7) ^ SHL(q[17],8) ^ mv[1]) + ( XL64 ^ q[25] ^ q[1]);
BMW_H[2] = (SHR(XH64, 5) ^ SHL(q[18],5) ^ mv[2]) + ( XL64 ^ q[26] ^ q[2]);
BMW_H[3] = (SHR(XH64, 1) ^ SHL(q[19],5) ^ mv[3]) + ( XL64 ^ q[27] ^ q[3]);
BMW_H[4] = (SHR(XH64, 3) ^ q[20] ^ mv[4]) + ( XL64 ^ q[28] ^ q[4]);
BMW_H[5] = (SHL(XH64, 6) ^ SHR(q[21],6) ^ mv[5]) + ( XL64 ^ q[29] ^ q[5]);
BMW_H[6] = (SHR(XH64, 4) ^ SHL(q[22],6) ^ mv[6]) + ( XL64 ^ q[30] ^ q[6]);
BMW_H[7] = (SHR(XH64,11) ^ SHL(q[23],2) ^ mv[7]) + ( XL64 ^ q[31] ^ q[7]);
BMW_H[8] = SPH_ROTL64(BMW_H[4], 9) + ( XH64 ^ q[24] ^ mv[8]) + (SHL(XL64,8) ^ q[23] ^ q[8]);
BMW_H[9] = SPH_ROTL64(BMW_H[5],10) + ( XH64 ^ q[25] ^ mv[9]) + (SHR(XL64,6) ^ q[16] ^ q[9]);
BMW_H[10] = SPH_ROTL64(BMW_H[6],11) + ( XH64 ^ q[26] ^ mv[10]) + (SHL(XL64,6) ^ q[17] ^ q[10]);
BMW_H[11] = SPH_ROTL64(BMW_H[7],12) + ( XH64 ^ q[27] ^ mv[11]) + (SHL(XL64,4) ^ q[18] ^ q[11]);
BMW_H[12] = SPH_ROTL64(BMW_H[0],13) + ( XH64 ^ q[28] ^ mv[12]) + (SHR(XL64,3) ^ q[19] ^ q[12]);
BMW_H[13] = SPH_ROTL64(BMW_H[1],14) + ( XH64 ^ q[29] ^ mv[13]) + (SHR(XL64,4) ^ q[20] ^ q[13]);
BMW_H[14] = SPH_ROTL64(BMW_H[2],15) + ( XH64 ^ q[30] ^ mv[14]) + (SHR(XL64,7) ^ q[21] ^ q[14]);
BMW_H[15] = SPH_ROTL64(BMW_H[3],16) + ( XH64 ^ q[31] ^ mv[15]) + (SHR(XL64,2) ^ q[22] ^ q[15]);
#pragma unroll 16
for(int i=0;i<16;i++)
{
mv[i] = BMW_H[i];
BMW_H[i] = 0xaaaaaaaaaaaaaaa0ull + (sph_u64)i;
}
#undef M tmp = (mv[5] ^ BMW_H[5]) - (mv[7] ^ BMW_H[7]) + (mv[10] ^ BMW_H[10]) + (mv[13] ^ BMW_H[13]) + (mv[14] ^ BMW_H[14]);
#undef H q[0] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[1];
#undef dH tmp = (mv[6] ^ BMW_H[6]) - (mv[8] ^ BMW_H[8]) + (mv[11] ^ BMW_H[11]) + (mv[14] ^ BMW_H[14]) - (mv[15] ^ BMW_H[15]);
q[1] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[2];
tmp = (mv[0] ^ BMW_H[0]) + (mv[7] ^ BMW_H[7]) + (mv[9] ^ BMW_H[9]) - (mv[12] ^ BMW_H[12]) + (mv[15] ^ BMW_H[15]);
q[2] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[3];
tmp = (mv[0] ^ BMW_H[0]) - (mv[1] ^ BMW_H[1]) + (mv[8] ^ BMW_H[8]) - (mv[10] ^ BMW_H[10]) + (mv[13] ^ BMW_H[13]);
q[3] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[4];
tmp = (mv[1] ^ BMW_H[1]) + (mv[2] ^ BMW_H[2]) + (mv[9] ^ BMW_H[9]) - (mv[11] ^ BMW_H[11]) - (mv[14] ^ BMW_H[14]);
q[4] = (SHR(tmp, 1) ^ tmp) + BMW_H[5];
tmp = (mv[3] ^ BMW_H[3]) - (mv[2] ^ BMW_H[2]) + (mv[10] ^ BMW_H[10]) - (mv[12] ^ BMW_H[12]) + (mv[15] ^ BMW_H[15]);
q[5] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[6];
tmp = (mv[4] ^ BMW_H[4]) - (mv[0] ^ BMW_H[0]) - (mv[3] ^ BMW_H[3]) - (mv[11] ^ BMW_H[11]) + (mv[13] ^ BMW_H[13]);
q[6] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[7];
tmp = (mv[1] ^ BMW_H[1]) - (mv[4] ^ BMW_H[4]) - (mv[5] ^ BMW_H[5]) - (mv[12] ^ BMW_H[12]) - (mv[14] ^ BMW_H[14]);
q[7] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[8];
tmp = (mv[2] ^ BMW_H[2]) - (mv[5] ^ BMW_H[5]) - (mv[6] ^ BMW_H[6]) + (mv[13] ^ BMW_H[13]) - (mv[15] ^ BMW_H[15]);
q[8] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[9];
tmp = (mv[0] ^ BMW_H[0]) - (mv[3] ^ BMW_H[3]) + (mv[6] ^ BMW_H[6]) - (mv[7] ^ BMW_H[7]) + (mv[14] ^ BMW_H[14]);
q[9] = (SHR(tmp, 1) ^ tmp) + BMW_H[10];
tmp = (mv[8] ^ BMW_H[8]) - (mv[1] ^ BMW_H[1]) - (mv[4] ^ BMW_H[4]) - (mv[7] ^ BMW_H[7]) + (mv[15] ^ BMW_H[15]);
q[10] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[11];
tmp = (mv[8] ^ BMW_H[8]) - (mv[0] ^ BMW_H[0]) - (mv[2] ^ BMW_H[2]) - (mv[5] ^ BMW_H[5]) + (mv[9] ^ BMW_H[9]);
q[11] = (SHR(tmp, 1) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 13) ^ SPH_ROTL64(tmp, 43)) + BMW_H[12];
tmp = (mv[1] ^ BMW_H[1]) + (mv[3] ^ BMW_H[3]) - (mv[6] ^ BMW_H[6]) - (mv[9] ^ BMW_H[9]) + (mv[10] ^ BMW_H[10]);
q[12] = (SHR(tmp, 2) ^ SHL(tmp, 1) ^ SPH_ROTL64(tmp, 19) ^ SPH_ROTL64(tmp, 53)) + BMW_H[13];
tmp = (mv[2] ^ BMW_H[2]) + (mv[4] ^ BMW_H[4]) + (mv[7] ^ BMW_H[7]) + (mv[10] ^ BMW_H[10]) + (mv[11] ^ BMW_H[11]);
q[13] = (SHR(tmp, 2) ^ SHL(tmp, 2) ^ SPH_ROTL64(tmp, 28) ^ SPH_ROTL64(tmp, 59)) + BMW_H[14];
tmp = (mv[3] ^ BMW_H[3]) - (mv[5] ^ BMW_H[5]) + (mv[8] ^ BMW_H[8]) - (mv[11] ^ BMW_H[11]) - (mv[12] ^ BMW_H[12]);
q[14] = (SHR(tmp, 1) ^ tmp) + BMW_H[15];
tmp = (mv[12] ^ BMW_H[12]) - (mv[4] ^ BMW_H[4]) - (mv[6] ^ BMW_H[6]) - (mv[9] ^ BMW_H[9]) + (mv[13] ^ BMW_H[13]);
q[15] = (SHR(tmp, 1) ^ SHL(tmp, 3) ^ SPH_ROTL64(tmp, 4) ^ SPH_ROTL64(tmp, 37)) + BMW_H[0];
#pragma unroll 2
for(int i=0;i<2;i++)
{
q[i+16] =
(SHR(q[i], 1) ^ SHL(q[i], 2) ^ SPH_ROTL64(q[i], 13) ^ SPH_ROTL64(q[i], 43)) +
(SHR(q[i+1], 2) ^ SHL(q[i+1], 1) ^ SPH_ROTL64(q[i+1], 19) ^ SPH_ROTL64(q[i+1], 53)) +
(SHR(q[i+2], 2) ^ SHL(q[i+2], 2) ^ SPH_ROTL64(q[i+2], 28) ^ SPH_ROTL64(q[i+2], 59)) +
(SHR(q[i+3], 1) ^ SHL(q[i+3], 3) ^ SPH_ROTL64(q[i+3], 4) ^ SPH_ROTL64(q[i+3], 37)) +
(SHR(q[i+4], 1) ^ SHL(q[i+4], 2) ^ SPH_ROTL64(q[i+4], 13) ^ SPH_ROTL64(q[i+4], 43)) +
(SHR(q[i+5], 2) ^ SHL(q[i+5], 1) ^ SPH_ROTL64(q[i+5], 19) ^ SPH_ROTL64(q[i+5], 53)) +
(SHR(q[i+6], 2) ^ SHL(q[i+6], 2) ^ SPH_ROTL64(q[i+6], 28) ^ SPH_ROTL64(q[i+6], 59)) +
(SHR(q[i+7], 1) ^ SHL(q[i+7], 3) ^ SPH_ROTL64(q[i+7], 4) ^ SPH_ROTL64(q[i+7], 37)) +
(SHR(q[i+8], 1) ^ SHL(q[i+8], 2) ^ SPH_ROTL64(q[i+8], 13) ^ SPH_ROTL64(q[i+8], 43)) +
(SHR(q[i+9], 2) ^ SHL(q[i+9], 1) ^ SPH_ROTL64(q[i+9], 19) ^ SPH_ROTL64(q[i+9], 53)) +
(SHR(q[i+10], 2) ^ SHL(q[i+10], 2) ^ SPH_ROTL64(q[i+10], 28) ^ SPH_ROTL64(q[i+10], 59)) +
(SHR(q[i+11], 1) ^ SHL(q[i+11], 3) ^ SPH_ROTL64(q[i+11], 4) ^ SPH_ROTL64(q[i+11], 37)) +
(SHR(q[i+12], 1) ^ SHL(q[i+12], 2) ^ SPH_ROTL64(q[i+12], 13) ^ SPH_ROTL64(q[i+12], 43)) +
(SHR(q[i+13], 2) ^ SHL(q[i+13], 1) ^ SPH_ROTL64(q[i+13], 19) ^ SPH_ROTL64(q[i+13], 53)) +
(SHR(q[i+14], 2) ^ SHL(q[i+14], 2) ^ SPH_ROTL64(q[i+14], 28) ^ SPH_ROTL64(q[i+14], 59)) +
(SHR(q[i+15], 1) ^ SHL(q[i+15], 3) ^ SPH_ROTL64(q[i+15], 4) ^ SPH_ROTL64(q[i+15], 37)) +
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) +
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]);
}
#define M(x) (BMW_h2[x]) #pragma unroll 4
#define H(x) (final_b[x]) for(int i=2;i<6;i++)
#define dH(x) (BMW_h1[x]) {
q[i+16] = CONST_EXP2 +
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) +
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i+10], i+11) ) ^ BMW_H[i+7]);
}
FOLDb; #pragma unroll 3
for(int i=6;i<9;i++)
{
q[i+16] = CONST_EXP2 +
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) +
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i+7]);
}
#undef M #pragma unroll 4
#undef H for(int i=9;i<13;i++)
#undef dH {
q[i+16] = CONST_EXP2 +
(( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) +
SPH_ROTL64(mv[i+3], i+4) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]);
}
hash->h8[0] = SWAP8(BMW_h1[8]); #pragma unroll 3
hash->h8[1] = SWAP8(BMW_h1[9]); for(int i=13;i<16;i++)
hash->h8[2] = SWAP8(BMW_h1[10]); {
hash->h8[3] = SWAP8(BMW_h1[11]); q[i+16] = CONST_EXP2 +
hash->h8[4] = SWAP8(BMW_h1[12]); (( ((i+16)*(0x0555555555555555ull)) + SPH_ROTL64(mv[i], i+1) +
hash->h8[5] = SWAP8(BMW_h1[13]); SPH_ROTL64(mv[i-13], (i-13)+1) - SPH_ROTL64(mv[i-6], (i-6)+1) ) ^ BMW_H[i-9]);
hash->h8[6] = SWAP8(BMW_h1[14]); }
hash->h8[7] = SWAP8(BMW_h1[15]);
XL64 = q[16]^q[17]^q[18]^q[19]^q[20]^q[21]^q[22]^q[23];
XH64 = XL64^q[24]^q[25]^q[26]^q[27]^q[28]^q[29]^q[30]^q[31];
BMW_H[0] = (SHL(XH64, 5) ^ SHR(q[16],5) ^ mv[0]) + ( XL64 ^ q[24] ^ q[0]);
BMW_H[1] = (SHR(XH64, 7) ^ SHL(q[17],8) ^ mv[1]) + ( XL64 ^ q[25] ^ q[1]);
BMW_H[2] = (SHR(XH64, 5) ^ SHL(q[18],5) ^ mv[2]) + ( XL64 ^ q[26] ^ q[2]);
BMW_H[3] = (SHR(XH64, 1) ^ SHL(q[19],5) ^ mv[3]) + ( XL64 ^ q[27] ^ q[3]);
BMW_H[4] = (SHR(XH64, 3) ^ q[20] ^ mv[4]) + ( XL64 ^ q[28] ^ q[4]);
BMW_H[5] = (SHL(XH64, 6) ^ SHR(q[21],6) ^ mv[5]) + ( XL64 ^ q[29] ^ q[5]);
BMW_H[6] = (SHR(XH64, 4) ^ SHL(q[22],6) ^ mv[6]) + ( XL64 ^ q[30] ^ q[6]);
BMW_H[7] = (SHR(XH64,11) ^ SHL(q[23],2) ^ mv[7]) + ( XL64 ^ q[31] ^ q[7]);
BMW_H[8] = SPH_ROTL64(BMW_H[4], 9) + ( XH64 ^ q[24] ^ mv[8]) + (SHL(XL64,8) ^ q[23] ^ q[8]);
BMW_H[9] = SPH_ROTL64(BMW_H[5],10) + ( XH64 ^ q[25] ^ mv[9]) + (SHR(XL64,6) ^ q[16] ^ q[9]);
BMW_H[10] = SPH_ROTL64(BMW_H[6],11) + ( XH64 ^ q[26] ^ mv[10]) + (SHL(XL64,6) ^ q[17] ^ q[10]);
BMW_H[11] = SPH_ROTL64(BMW_H[7],12) + ( XH64 ^ q[27] ^ mv[11]) + (SHL(XL64,4) ^ q[18] ^ q[11]);
BMW_H[12] = SPH_ROTL64(BMW_H[0],13) + ( XH64 ^ q[28] ^ mv[12]) + (SHR(XL64,3) ^ q[19] ^ q[12]);
BMW_H[13] = SPH_ROTL64(BMW_H[1],14) + ( XH64 ^ q[29] ^ mv[13]) + (SHR(XL64,4) ^ q[20] ^ q[13]);
BMW_H[14] = SPH_ROTL64(BMW_H[2],15) + ( XH64 ^ q[30] ^ mv[14]) + (SHR(XL64,7) ^ q[21] ^ q[14]);
BMW_H[15] = SPH_ROTL64(BMW_H[3],16) + ( XH64 ^ q[31] ^ mv[15]) + (SHR(XL64,2) ^ q[22] ^ q[15]);
hash->h8[0] = SWAP8(BMW_H[8]);
hash->h8[1] = SWAP8(BMW_H[9]);
hash->h8[2] = SWAP8(BMW_H[10]);
hash->h8[3] = SWAP8(BMW_H[11]);
hash->h8[4] = SWAP8(BMW_H[12]);
hash->h8[5] = SWAP8(BMW_H[13]);
hash->h8[6] = SWAP8(BMW_H[14]);
hash->h8[7] = SWAP8(BMW_H[15]);
barrier(CLK_GLOBAL_MEM_FENCE); barrier(CLK_GLOBAL_MEM_FENCE);
} }
@ -232,43 +449,53 @@ __kernel void search2(__global hash_t* hashes)
uint gid = get_global_id(0); uint gid = get_global_id(0);
__global hash_t *hash = &(hashes[gid-get_global_offset(0)]); __global hash_t *hash = &(hashes[gid-get_global_offset(0)]);
__local sph_u64 T0_L[256], T1_L[256], T2_L[256], T3_L[256], T4_L[256], T5_L[256], T6_L[256], T7_L[256]; #if !SPH_SMALL_FOOTPRINT_GROESTL
__local sph_u64 T0_C[256], T1_C[256], T2_C[256], T3_C[256];
__local sph_u64 T4_C[256], T5_C[256], T6_C[256], T7_C[256];
#else
__local sph_u64 T0_C[256], T4_C[256];
#endif
int init = get_local_id(0); int init = get_local_id(0);
int step = get_local_size(0); int step = get_local_size(0);
for (int i = init; i < 256; i += step) for (int i = init; i < 256; i += step)
{ {
T0_L[i] = T0[i]; T0_C[i] = T0[i];
T1_L[i] = T1[i]; T4_C[i] = T4[i];
T2_L[i] = T2[i]; #if !SPH_SMALL_FOOTPRINT_GROESTL
T3_L[i] = T3[i]; T1_C[i] = T1[i];
T4_L[i] = T4[i]; T2_C[i] = T2[i];
T5_L[i] = T5[i]; T3_C[i] = T3[i];
T6_L[i] = T6[i]; T5_C[i] = T5[i];
T7_L[i] = T7[i]; T6_C[i] = T6[i];
T7_C[i] = T7[i];
#endif
} }
barrier(CLK_LOCAL_MEM_FENCE);
#define T0 T0_L barrier(CLK_LOCAL_MEM_FENCE); // groestl
#define T1 T1_L
#define T2 T2_L #define T0 T0_C
#define T3 T3_L #define T1 T1_C
#define T4 T4_L #define T2 T2_C
#define T5 T5_L #define T3 T3_C
#define T6 T6_L #define T4 T4_C
#define T7 T7_L #define T5 T5_C
#define T6 T6_C
#define T7 T7_C
// groestl // groestl
sph_u64 H[16]; sph_u64 H[16];
for (unsigned int u = 0; u < 15; u ++) for (unsigned int u = 0; u < 15; u ++)
H[u] = 0; H[u] = 0;
#if USE_LE
#if USE_LE
H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40);
#else #else
H[15] = (sph_u64)512; H[15] = (sph_u64)512;
#endif #endif
sph_u64 g[16], m[16]; sph_u64 g[16], m[16];
m[0] = DEC64E(hash->h8[0]); m[0] = DEC64E(hash->h8[0]);
@ -279,8 +506,10 @@ __kernel void search2(__global hash_t* hashes)
m[5] = DEC64E(hash->h8[5]); m[5] = DEC64E(hash->h8[5]);
m[6] = DEC64E(hash->h8[6]); m[6] = DEC64E(hash->h8[6]);
m[7] = DEC64E(hash->h8[7]); m[7] = DEC64E(hash->h8[7]);
for (unsigned int u = 0; u < 16; u ++) for (unsigned int u = 0; u < 16; u ++)
g[u] = m[u] ^ H[u]; g[u] = m[u] ^ H[u];
m[8] = 0x80; g[8] = m[8] ^ H[8]; m[8] = 0x80; g[8] = m[8] ^ H[8];
m[9] = 0; g[9] = m[9] ^ H[9]; m[9] = 0; g[9] = m[9] ^ H[9];
m[10] = 0; g[10] = m[10] ^ H[10]; m[10] = 0; g[10] = m[10] ^ H[10];
@ -289,16 +518,23 @@ __kernel void search2(__global hash_t* hashes)
m[13] = 0; g[13] = m[13] ^ H[13]; m[13] = 0; g[13] = m[13] ^ H[13];
m[14] = 0; g[14] = m[14] ^ H[14]; m[14] = 0; g[14] = m[14] ^ H[14];
m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; m[15] = 0x100000000000000; g[15] = m[15] ^ H[15];
PERM_BIG_P(g); PERM_BIG_P(g);
PERM_BIG_Q(m); PERM_BIG_Q(m);
for (unsigned int u = 0; u < 16; u ++) for (unsigned int u = 0; u < 16; u ++)
H[u] ^= g[u] ^ m[u]; H[u] ^= g[u] ^ m[u];
sph_u64 xH[16]; sph_u64 xH[16];
for (unsigned int u = 0; u < 16; u ++) for (unsigned int u = 0; u < 16; u ++)
xH[u] = H[u]; xH[u] = H[u];
PERM_BIG_P(xH); PERM_BIG_P(xH);
for (unsigned int u = 0; u < 16; u ++) for (unsigned int u = 0; u < 16; u ++)
H[u] ^= xH[u]; H[u] ^= xH[u];
for (unsigned int u = 0; u < 8; u ++) for (unsigned int u = 0; u < 8; u ++)
hash->h8[u] = DEC64E(H[u + 8]); hash->h8[u] = DEC64E(H[u + 8]);
@ -355,7 +591,8 @@ __kernel void search4(__global hash_t* hashes)
for(int i = 0; i < 2; i++) for(int i = 0; i < 2; i++)
{ {
if (i == 0) { if (i == 0)
{
h0h ^= DEC64E(hash->h8[0]); h0h ^= DEC64E(hash->h8[0]);
h0l ^= DEC64E(hash->h8[1]); h0l ^= DEC64E(hash->h8[1]);
h1h ^= DEC64E(hash->h8[2]); h1h ^= DEC64E(hash->h8[2]);
@ -364,7 +601,9 @@ __kernel void search4(__global hash_t* hashes)
h2l ^= DEC64E(hash->h8[5]); h2l ^= DEC64E(hash->h8[5]);
h3h ^= DEC64E(hash->h8[6]); h3h ^= DEC64E(hash->h8[6]);
h3l ^= DEC64E(hash->h8[7]); h3l ^= DEC64E(hash->h8[7]);
} else if(i == 1) { }
else if(i == 1)
{
h4h ^= DEC64E(hash->h8[0]); h4h ^= DEC64E(hash->h8[0]);
h4l ^= DEC64E(hash->h8[1]); h4l ^= DEC64E(hash->h8[1]);
h5h ^= DEC64E(hash->h8[2]); h5h ^= DEC64E(hash->h8[2]);
@ -471,7 +710,8 @@ __kernel void search6(__global hash_t* hashes)
MI5; MI5;
LUFFA_P5; LUFFA_P5;
if(i == 0) { if(i == 0)
{
M0 = hash->h4[9]; M0 = hash->h4[9];
M1 = hash->h4[8]; M1 = hash->h4[8];
M2 = hash->h4[11]; M2 = hash->h4[11];
@ -480,12 +720,16 @@ __kernel void search6(__global hash_t* hashes)
M5 = hash->h4[12]; M5 = hash->h4[12];
M6 = hash->h4[15]; M6 = hash->h4[15];
M7 = hash->h4[14]; M7 = hash->h4[14];
} else if(i == 1) { }
else if(i == 1)
{
M0 = 0x80000000; M0 = 0x80000000;
M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0; M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0;
} else if(i == 2) { }
else if(i == 2)
M0 = M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0; M0 = M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0;
} else if(i == 3) { else if(i == 3)
{
hash->h4[1] = V00 ^ V10 ^ V20 ^ V30 ^ V40; hash->h4[1] = V00 ^ V10 ^ V20 ^ V30 ^ V40;
hash->h4[0] = V01 ^ V11 ^ V21 ^ V31 ^ V41; hash->h4[0] = V01 ^ V11 ^ V21 ^ V31 ^ V41;
hash->h4[3] = V02 ^ V12 ^ V22 ^ V32 ^ V42; hash->h4[3] = V02 ^ V12 ^ V22 ^ V32 ^ V42;
@ -535,10 +779,12 @@ __kernel void search7(__global hash_t* hashes)
x6 ^= SWAP4(hash->h4[7]); x6 ^= SWAP4(hash->h4[7]);
x7 ^= SWAP4(hash->h4[6]); x7 ^= SWAP4(hash->h4[6]);
for (int i = 0; i < 13; i ++) { for (int i = 0; i < 13; i ++)
{
SIXTEEN_ROUNDS; SIXTEEN_ROUNDS;
if (i == 0) { if (i == 0)
{
x0 ^= SWAP4(hash->h4[9]); x0 ^= SWAP4(hash->h4[9]);
x1 ^= SWAP4(hash->h4[8]); x1 ^= SWAP4(hash->h4[8]);
x2 ^= SWAP4(hash->h4[11]); x2 ^= SWAP4(hash->h4[11]);
@ -547,12 +793,12 @@ __kernel void search7(__global hash_t* hashes)
x5 ^= SWAP4(hash->h4[12]); x5 ^= SWAP4(hash->h4[12]);
x6 ^= SWAP4(hash->h4[15]); x6 ^= SWAP4(hash->h4[15]);
x7 ^= SWAP4(hash->h4[14]); x7 ^= SWAP4(hash->h4[14]);
} else if(i == 1) { }
else if(i == 1)
x0 ^= 0x80; x0 ^= 0x80;
} else if (i == 2) { else if (i == 2)
xv ^= SPH_C32(1); xv ^= SPH_C32(1);
} }
}
hash->h4[0] = x0; hash->h4[0] = x0;
hash->h4[1] = x1; hash->h4[1] = x1;
@ -662,8 +908,10 @@ __kernel void search9(__global hash_t* hashes)
// simd // simd
s32 q[256]; s32 q[256];
unsigned char x[128]; unsigned char x[128];
for(unsigned int i = 0; i < 64; i++) for(unsigned int i = 0; i < 64; i++)
x[i] = hash->h1[i]; x[i] = hash->h1[i];
for(unsigned int i = 64; i < 128; i++) for(unsigned int i = 64; i < 128; i++)
x[i] = 0; x[i] = 0;
@ -796,9 +1044,8 @@ __kernel void search10(__global hash_t* hashes)
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
for (int i = 0; i < 8; i++) { for (int i = 0; i < 8; i++)
hash.h8[i] = hashes[gid-offset].h8[i]; hash.h8[i] = hashes[gid-offset].h8[i];
}
// echo // echo
sph_u64 W00, W01, W10, W11, W20, W21, W30, W31, W40, W41, W50, W51, W60, W61, W70, W71, W80, W81, W90, W91, WA0, WA1, WB0, WB1, WC0, WC1, WD0, WD1, WE0, WE1, WF0, WF1; sph_u64 W00, W01, W10, W11, W20, W21, W30, W31, W40, W41, W50, W51, W60, W61, W70, W71, W80, W81, W90, W91, WA0, WA1, WB0, WB1, WC0, WC1, WD0, WD1, WE0, WE1, WF0, WF1;
@ -844,9 +1091,8 @@ __kernel void search10(__global hash_t* hashes)
WF0 = 0x200; WF0 = 0x200;
WF1 = 0; WF1 = 0;
for (unsigned u = 0; u < 10; u ++) { for (unsigned u = 0; u < 10; u ++)
BIG_ROUND; BIG_ROUND;
}
hashp->h8[0] = hash.h8[0] ^ Vb00 ^ W00 ^ W80; hashp->h8[0] = hash.h8[0] ^ Vb00 ^ W00 ^ W80;
hashp->h8[1] = hash.h8[1] ^ Vb01 ^ W01 ^ W81; hashp->h8[1] = hash.h8[1] ^ Vb01 ^ W01 ^ W81;
@ -865,8 +1111,7 @@ __kernel void search11(__global hash_t* hashes)
{ {
uint gid = get_global_id(0); uint gid = get_global_id(0);
uint offset = get_global_offset(0); uint offset = get_global_offset(0);
hash_t hash; __global hash_t *hash = &(hashes[gid-offset]);
__global hash_t *hashp = &(hashes[gid-offset]);
sph_u32 c0 = HAMSI_IV512[0], c1 = HAMSI_IV512[1], c2 = HAMSI_IV512[2], c3 = HAMSI_IV512[3]; sph_u32 c0 = HAMSI_IV512[0], c1 = HAMSI_IV512[1], c2 = HAMSI_IV512[2], c3 = HAMSI_IV512[3];
sph_u32 c4 = HAMSI_IV512[4], c5 = HAMSI_IV512[5], c6 = HAMSI_IV512[6], c7 = HAMSI_IV512[7]; sph_u32 c4 = HAMSI_IV512[4], c5 = HAMSI_IV512[5], c6 = HAMSI_IV512[6], c7 = HAMSI_IV512[7];
@ -876,29 +1121,28 @@ __kernel void search11(__global hash_t* hashes)
sph_u32 m8, m9, mA, mB, mC, mD, mE, mF; sph_u32 m8, m9, mA, mB, mC, mD, mE, mF;
sph_u32 h[16] = { c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, cA, cB, cC, cD, cE, cF }; sph_u32 h[16] = { c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, cA, cB, cC, cD, cE, cF };
for (int i = 0; i < 8; i++) { #define buf(u) hash->h1[i + u]
hash.h8[i] = hashes[gid-offset].h8[i]; for(int i = 0; i < 64; i += 8)
} {
#define buf(u) hash.h1[i + u]
for(int i = 0; i < 64; i += 8) {
INPUT_BIG; INPUT_BIG;
P_BIG; P_BIG;
T_BIG; T_BIG;
} }
#undef buf #undef buf
#define buf(u) (u == 0 ? 0x80 : 0)
#define buf(u) (u == 0 ? 0x80 : 0)
INPUT_BIG; INPUT_BIG;
P_BIG; P_BIG;
T_BIG; T_BIG;
#undef buf #undef buf
#define buf(u) (u == 6 ? 2 : 0)
#define buf(u) (u == 6 ? 2 : 0)
INPUT_BIG; INPUT_BIG;
PF_BIG; PF_BIG;
T_BIG; T_BIG;
for (unsigned u = 0; u < 16; u ++) for(unsigned u = 0; u < 16; u ++)
hashp->h4[u] = h[u]; hash->h4[u] = h[u];
barrier(CLK_GLOBAL_MEM_FENCE); barrier(CLK_GLOBAL_MEM_FENCE);
} }
@ -908,12 +1152,7 @@ __kernel void search12(__global hash_t* hashes, __global uint* output, const ulo
{ {
uint gid = get_global_id(0); uint gid = get_global_id(0);
uint offset = get_global_offset(0); uint offset = get_global_offset(0);
hash_t hash; __global hash_t *hash = &(hashes[gid-offset]);
__global hash_t *hashp = &(hashes[gid-offset]);
for (int i = 0; i < 8; i++) {
hash.h8[i] = hashes[gid-offset].h8[i];
}
// fugue // fugue
sph_u32 S00, S01, S02, S03, S04, S05, S06, S07, S08, S09; sph_u32 S00, S01, S02, S03, S04, S05, S06, S07, S08, S09;
@ -929,22 +1168,25 @@ __kernel void search12(__global hash_t* hashes, __global uint* output, const ulo
S28 = SPH_C32(0xaac6e2c9); S29 = SPH_C32(0xddb21398); S30 = SPH_C32(0xcae65838); S31 = SPH_C32(0x437f203f); S28 = SPH_C32(0xaac6e2c9); S29 = SPH_C32(0xddb21398); S30 = SPH_C32(0xcae65838); S31 = SPH_C32(0x437f203f);
S32 = SPH_C32(0x25ea78e7); S33 = SPH_C32(0x951fddd6); S34 = SPH_C32(0xda6ed11d); S35 = SPH_C32(0xe13e3567); S32 = SPH_C32(0x25ea78e7); S33 = SPH_C32(0x951fddd6); S34 = SPH_C32(0xda6ed11d); S35 = SPH_C32(0xe13e3567);
FUGUE512_3((hash.h4[0x0]), (hash.h4[0x1]), (hash.h4[0x2])); FUGUE512_3((hash->h4[0x0]), (hash->h4[0x1]), (hash->h4[0x2]));
FUGUE512_3((hash.h4[0x3]), (hash.h4[0x4]), (hash.h4[0x5])); FUGUE512_3((hash->h4[0x3]), (hash->h4[0x4]), (hash->h4[0x5]));
FUGUE512_3((hash.h4[0x6]), (hash.h4[0x7]), (hash.h4[0x8])); FUGUE512_3((hash->h4[0x6]), (hash->h4[0x7]), (hash->h4[0x8]));
FUGUE512_3((hash.h4[0x9]), (hash.h4[0xA]), (hash.h4[0xB])); FUGUE512_3((hash->h4[0x9]), (hash->h4[0xA]), (hash->h4[0xB]));
FUGUE512_3((hash.h4[0xC]), (hash.h4[0xD]), (hash.h4[0xE])); FUGUE512_3((hash->h4[0xC]), (hash->h4[0xD]), (hash->h4[0xE]));
FUGUE512_3((hash.h4[0xF]), as_uint2(fc_bit_count).y, as_uint2(fc_bit_count).x); FUGUE512_3((hash->h4[0xF]), as_uint2(fc_bit_count).y, as_uint2(fc_bit_count).x);
// apply round shift if necessary // apply round shift if necessary
int i; int i;
for (i = 0; i < 32; i ++) { for (i = 0; i < 32; i ++)
{
ROR3; ROR3;
CMIX36(S00, S01, S02, S04, S05, S06, S18, S19, S20); CMIX36(S00, S01, S02, S04, S05, S06, S18, S19, S20);
SMIX(S00, S01, S02, S03); SMIX(S00, S01, S02, S03);
} }
for (i = 0; i < 13; i ++) {
for (i = 0; i < 13; i ++)
{
S04 ^= S00; S04 ^= S00;
S09 ^= S00; S09 ^= S00;
S18 ^= S00; S18 ^= S00;
@ -975,24 +1217,24 @@ __kernel void search12(__global hash_t* hashes, __global uint* output, const ulo
S18 ^= S00; S18 ^= S00;
S27 ^= S00; S27 ^= S00;
hash.h4[0] = SWAP4(S01); hash->h4[0] = SWAP4(S01);
hash.h4[1] = SWAP4(S02); hash->h4[1] = SWAP4(S02);
hash.h4[2] = SWAP4(S03); hash->h4[2] = SWAP4(S03);
hash.h4[3] = SWAP4(S04); hash->h4[3] = SWAP4(S04);
hash.h4[4] = SWAP4(S09); hash->h4[4] = SWAP4(S09);
hash.h4[5] = SWAP4(S10); hash->h4[5] = SWAP4(S10);
hash.h4[6] = SWAP4(S11); hash->h4[6] = SWAP4(S11);
hash.h4[7] = SWAP4(S12); hash->h4[7] = SWAP4(S12);
hash.h4[8] = SWAP4(S18); hash->h4[8] = SWAP4(S18);
hash.h4[9] = SWAP4(S19); hash->h4[9] = SWAP4(S19);
hash.h4[10] = SWAP4(S20); hash->h4[10] = SWAP4(S20);
hash.h4[11] = SWAP4(S21); hash->h4[11] = SWAP4(S21);
hash.h4[12] = SWAP4(S27); hash->h4[12] = SWAP4(S27);
hash.h4[13] = SWAP4(S28); hash->h4[13] = SWAP4(S28);
hash.h4[14] = SWAP4(S29); hash->h4[14] = SWAP4(S29);
hash.h4[15] = SWAP4(S30); hash->h4[15] = SWAP4(S30);
bool result = (hash.h8[3] <= target); bool result = (hash->h8[3] <= target);
if (result) if (result)
output[atomic_inc(output+0xFF)] = SWAP4(gid); output[atomic_inc(output+0xFF)] = SWAP4(gid);

2
miner.h

@ -1008,7 +1008,7 @@ extern int opt_queue;
extern int opt_scantime; extern int opt_scantime;
extern int opt_expiry; extern int opt_expiry;
extern algorithm_t opt_algorithm; //extern algorithm_t opt_algorithm;
extern cglock_t control_lock; extern cglock_t control_lock;
extern pthread_mutex_t hash_lock; extern pthread_mutex_t hash_lock;

102
sgminer.c

@ -91,7 +91,7 @@ int opt_queue = 1;
int opt_scantime = 7; int opt_scantime = 7;
int opt_expiry = 28; int opt_expiry = 28;
algorithm_t opt_algorithm; //algorithm_t opt_algorithm;
unsigned long long global_hashrate; unsigned long long global_hashrate;
unsigned long global_quota_gcd = 1; unsigned long global_quota_gcd = 1;
@ -513,9 +513,7 @@ struct pool *add_pool(void)
buf[0] = '\0'; buf[0] = '\0';
pool->name = strdup(buf); pool->name = strdup(buf);
pool->profile = strdup(buf); //profile blank by default pool->profile = strdup(buf); //profile blank by default
pool->algorithm.name[0] = '\0'; //blank algorithm name
/* Algorithm */
pool->algorithm = opt_algorithm;
pools = (struct pool **)realloc(pools, sizeof(struct pool *) * (total_pools + 2)); pools = (struct pool **)realloc(pools, sizeof(struct pool *) * (total_pools + 2));
pools[total_pools++] = pool; pools[total_pools++] = pool;
@ -637,7 +635,7 @@ char *set_devices(char *arg)
int i, val1 = 0, val2 = 0; int i, val1 = 0, val2 = 0;
char *nextptr; char *nextptr;
if (*arg) if(*arg)
{ {
if (*arg == '?') if (*arg == '?')
{ {
@ -645,9 +643,9 @@ char *set_devices(char *arg)
return NULL; return NULL;
} }
//all devices enabled //all devices enabled
else if(*arg == '*') else if(*arg == '*' || !strcasecmp(arg, "all"))
{ {
applog(LOG_DEBUG, "set_devices(%s)", arg); applog(LOG_DEBUG, "set_devices(all)");
opt_devs_enabled = 0; opt_devs_enabled = 0;
return NULL; return NULL;
} }
@ -1165,31 +1163,6 @@ static void load_temp_cutoffs()
} }
} }
static char *set_algo(const char *arg)
{
if ((json_array_index < 0) || (total_pools == 0)) {
set_algorithm(&opt_algorithm, arg);
applog(LOG_INFO, "Set default algorithm to %s", opt_algorithm.name);
} else {
set_pool_algorithm(arg);
}
return NULL;
}
static char *set_nfactor(const char *arg)
{
if ((json_array_index < 0) || (total_pools == 0)) {
set_algorithm_nfactor(&opt_algorithm, (const uint8_t) atoi(arg));
applog(LOG_INFO, "Set algorithm N-factor to %d (N to %d)",
opt_algorithm.nfactor, opt_algorithm.n);
} else {
set_pool_nfactor(arg);
}
return NULL;
}
static char *set_api_allow(const char *arg) static char *set_api_allow(const char *arg)
{ {
opt_set_charp(arg, &opt_api_allow); opt_set_charp(arg, &opt_api_allow);
@ -1251,7 +1224,7 @@ char *set_difficulty_multiplier(char *arg)
/* These options are available from config file or commandline */ /* These options are available from config file or commandline */
struct opt_table opt_config_table[] = { struct opt_table opt_config_table[] = {
OPT_WITH_ARG("--algorithm", OPT_WITH_ARG("--algorithm",
set_algo, NULL, NULL, set_default_algorithm, NULL, NULL,
"Set mining algorithm and most common defaults, default: scrypt"), "Set mining algorithm and most common defaults, default: scrypt"),
OPT_WITH_ARG("--api-allow", OPT_WITH_ARG("--api-allow",
set_api_allow, NULL, NULL, set_api_allow, NULL, NULL,
@ -1437,7 +1410,7 @@ struct opt_table opt_config_table[] = {
opt_set_bool, &opt_delaynet, opt_set_bool, &opt_delaynet,
"Impose small delays in networking to not overload slow routers"), "Impose small delays in networking to not overload slow routers"),
OPT_WITH_ARG("--nfactor", OPT_WITH_ARG("--nfactor",
set_nfactor, NULL, NULL, set_default_nfactor, NULL, NULL,
"Override default scrypt N-factor parameter."), "Override default scrypt N-factor parameter."),
#ifdef HAVE_ADL #ifdef HAVE_ADL
OPT_WITHOUT_ARG("--no-adl", OPT_WITHOUT_ARG("--no-adl",
@ -5925,8 +5898,10 @@ static void get_work_prepare_thread(struct thr_info *mythr, struct work *work)
bool soft_restart = !work->pool->gpu_threads; bool soft_restart = !work->pool->gpu_threads;
rd_lock(&mining_thr_lock); rd_lock(&mining_thr_lock);
// Shutdown all threads first (necessary) // Shutdown all threads first (necessary)
if (soft_restart) { if (soft_restart)
for (i = 0; i < start_threads; i++) { {
for (i = 0; i < start_threads; i++)
{
struct thr_info *thr = mining_thr[i]; struct thr_info *thr = mining_thr[i];
thr->cgpu->drv->thread_shutdown(thr); thr->cgpu->drv->thread_shutdown(thr);
} }
@ -5955,23 +5930,38 @@ static void get_work_prepare_thread(struct thr_info *mythr, struct work *work)
else if(!empty_string(default_profile.lookup_gap)) else if(!empty_string(default_profile.lookup_gap))
set_lookup_gap((char *)default_profile.lookup_gap); set_lookup_gap((char *)default_profile.lookup_gap);
//intensity //raw intensity from pool
if(!empty_string(work->pool->intensity))
set_intensity(work->pool->intensity);
else if(!empty_string(default_profile.intensity))
set_intensity(default_profile.intensity);
//xintensity
if (!empty_string(work->pool->xintensity))
set_xintensity(work->pool->xintensity);
else if(!empty_string(default_profile.xintensity))
set_xintensity(default_profile.xintensity);
//raw intensity
if (!empty_string(work->pool->rawintensity)) if (!empty_string(work->pool->rawintensity))
set_rawintensity(work->pool->rawintensity); set_rawintensity(work->pool->rawintensity);
//raw intensity from default profile
else if(!empty_string(default_profile.rawintensity)) else if(!empty_string(default_profile.rawintensity))
set_rawintensity(default_profile.rawintensity); set_rawintensity(default_profile.rawintensity);
//if no rawintensity is set try xintensity
else
{
//xintensity from pool
if (!empty_string(work->pool->xintensity))
set_xintensity(work->pool->xintensity);
//xintensity from default profile
else if(!empty_string(default_profile.xintensity))
set_xintensity(default_profile.xintensity);
//no xintensity set try intensity
else
{
//intensity from pool
if(!empty_string(work->pool->intensity))
set_intensity(work->pool->intensity);
//intensity from defaults
else if(!empty_string(default_profile.intensity))
set_intensity(default_profile.intensity);
//nothing set anywhere, use 8
else
{
default_profile.intensity = strdup("8");
set_intensity(default_profile.intensity);
}
}
}
//shaders //shaders
if (!empty_string(work->pool->shaders)) if (!empty_string(work->pool->shaders))
@ -5985,7 +5975,7 @@ static void get_work_prepare_thread(struct thr_info *mythr, struct work *work)
else if(!empty_string(default_profile.worksize)) else if(!empty_string(default_profile.worksize))
set_worksize((char *)default_profile.worksize); set_worksize((char *)default_profile.worksize);
#ifdef HAVE_ADL #ifdef HAVE_ADL
//GPU clock //GPU clock
if(!empty_string(work->pool->gpu_engine)) if(!empty_string(work->pool->gpu_engine))
set_gpu_engine(work->pool->gpu_engine); set_gpu_engine(work->pool->gpu_engine);
@ -6025,14 +6015,15 @@ static void get_work_prepare_thread(struct thr_info *mythr, struct work *work)
set_powertune(i, gpus[i].gpu_powertune); set_powertune(i, gpus[i].gpu_powertune);
set_vddc(i, gpus[i].gpu_vddc); set_vddc(i, gpus[i].gpu_vddc);
} }
#endif #endif
// Change algorithm for each thread (thread_prepare calls initCl) // Change algorithm for each thread (thread_prepare calls initCl)
for (i = 0; i < start_threads; i++) for (i = 0; i < start_threads; i++)
{ {
struct thr_info *thr = mining_thr[i]; struct thr_info *thr = mining_thr[i];
thr->cgpu->algorithm = work->pool->algorithm; thr->cgpu->algorithm = work->pool->algorithm;
if (soft_restart) { if (soft_restart)
{
thr->cgpu->drv->thread_prepare(thr); thr->cgpu->drv->thread_prepare(thr);
thr->cgpu->drv->thread_init(thr); thr->cgpu->drv->thread_init(thr);
} }
@ -6040,6 +6031,7 @@ static void get_work_prepare_thread(struct thr_info *mythr, struct work *work)
// Necessary because algorithms can have dramatically different diffs // Necessary because algorithms can have dramatically different diffs
thr->cgpu->drv->working_diff = 1; thr->cgpu->drv->working_diff = 1;
} }
rd_unlock(&mining_thr_lock); rd_unlock(&mining_thr_lock);
// Finish switching pools // Finish switching pools
@ -6111,7 +6103,9 @@ static void get_work_prepare_thread(struct thr_info *mythr, struct work *work)
pthread_cond_broadcast(&algo_switch_wait_cond); pthread_cond_broadcast(&algo_switch_wait_cond);
mutex_unlock(&algo_switch_wait_lock); mutex_unlock(&algo_switch_wait_lock);
// Not all threads are waiting, join the waiting list // Not all threads are waiting, join the waiting list
} else { }
else
{
mutex_unlock(&algo_switch_lock); mutex_unlock(&algo_switch_lock);
pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
// Wait for signal to start working again // Wait for signal to start working again
@ -7785,7 +7779,7 @@ int main(int argc, char *argv[])
#endif #endif
/* Default algorithm specified in algorithm.c ATM */ /* Default algorithm specified in algorithm.c ATM */
set_algorithm(&opt_algorithm, "scrypt"); set_algorithm(&default_profile.algorithm, "scrypt");
devcursor = 8; devcursor = 8;
logstart = devcursor + 1; logstart = devcursor + 1;

Loading…
Cancel
Save