Browse Source

manually merge changes to X11-mod/X13-mod kernels (lazybear)

djm34
Jan Berdajs 11 years ago
parent
commit
064d940ea2
  1. 1
      kernel/darkcoin-mod.cl
  2. 126
      kernel/hamsi_helper.cl
  3. 222
      kernel/marucoin-mod.cl

1
kernel/darkcoin-mod.cl

@ -3,6 +3,7 @@ @@ -3,6 +3,7 @@
* ==========================(LICENSE BEGIN)============================
*
* Copyright (c) 2014 phm
* Copyright (c) 2014 Girino Vey
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the

126
kernel/hamsi_helper.cl

@ -9282,8 +9282,8 @@ __constant static const sph_u32 T512_62[4][16] = { @@ -9282,8 +9282,8 @@ __constant static const sph_u32 T512_62[4][16] = {
};
#define INPUT_BIG do { \
unsigned acc = buf[0]; \
const sph_u32 *rp; \
unsigned acc = buf(0); \
__constant const sph_u32 *rp; \
rp = &T512_0[acc >> 6][0]; \
m0 = rp[0]; \
m1 = rp[1]; \
@ -9352,7 +9352,7 @@ __constant static const sph_u32 T512_62[4][16] = { @@ -9352,7 +9352,7 @@ __constant static const sph_u32 T512_62[4][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = buf[1]; \
acc = buf(1); \
rp = &T512_8[acc >> 6][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -9421,7 +9421,7 @@ __constant static const sph_u32 T512_62[4][16] = { @@ -9421,7 +9421,7 @@ __constant static const sph_u32 T512_62[4][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = buf[2]; \
acc = buf(2); \
rp = &T512_16[acc >> 6][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -9490,7 +9490,7 @@ __constant static const sph_u32 T512_62[4][16] = { @@ -9490,7 +9490,7 @@ __constant static const sph_u32 T512_62[4][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = buf[3]; \
acc = buf(3); \
rp = &T512_24[acc >> 6][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -9559,7 +9559,7 @@ __constant static const sph_u32 T512_62[4][16] = { @@ -9559,7 +9559,7 @@ __constant static const sph_u32 T512_62[4][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = buf[4]; \
acc = buf(4); \
rp = &T512_32[acc >> 6][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -9628,7 +9628,7 @@ __constant static const sph_u32 T512_62[4][16] = { @@ -9628,7 +9628,7 @@ __constant static const sph_u32 T512_62[4][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = buf[5]; \
acc = buf(5); \
rp = &T512_40[acc >> 6][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -9697,7 +9697,7 @@ __constant static const sph_u32 T512_62[4][16] = { @@ -9697,7 +9697,7 @@ __constant static const sph_u32 T512_62[4][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = buf[6]; \
acc = buf(6); \
rp = &T512_48[acc >> 6][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -9766,7 +9766,7 @@ __constant static const sph_u32 T512_62[4][16] = { @@ -9766,7 +9766,7 @@ __constant static const sph_u32 T512_62[4][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = buf[7]; \
acc = buf(7); \
rp = &T512_56[acc >> 6][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -10928,8 +10928,8 @@ __constant static const sph_u32 T512_63[2][16] = { @@ -10928,8 +10928,8 @@ __constant static const sph_u32 T512_63[2][16] = {
};
#define INPUT_BIG do { \
unsigned acc = buf[0]; \
const sph_u32 *rp; \
unsigned acc = buf(0); \
__constant const sph_u32 *rp; \
rp = &T512_0[acc >> 5][0]; \
m0 = rp[0]; \
m1 = rp[1]; \
@ -10964,7 +10964,7 @@ __constant static const sph_u32 T512_63[2][16] = { @@ -10964,7 +10964,7 @@ __constant static const sph_u32 T512_63[2][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = (acc << 8) | buf[1]; \
acc = (acc << 8) | buf(1); \
rp = &T512_6[(acc >> 7) & 0x07][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -11016,7 +11016,7 @@ __constant static const sph_u32 T512_63[2][16] = { @@ -11016,7 +11016,7 @@ __constant static const sph_u32 T512_63[2][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = (acc << 8) | buf[2]; \
acc = (acc << 8) | buf(2); \
rp = &T512_15[(acc >> 6) & 0x07][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -11068,7 +11068,7 @@ __constant static const sph_u32 T512_63[2][16] = { @@ -11068,7 +11068,7 @@ __constant static const sph_u32 T512_63[2][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = buf[3]; \
acc = buf(3); \
rp = &T512_24[acc >> 5][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -11103,7 +11103,7 @@ __constant static const sph_u32 T512_63[2][16] = { @@ -11103,7 +11103,7 @@ __constant static const sph_u32 T512_63[2][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = (acc << 8) | buf[4]; \
acc = (acc << 8) | buf(4); \
rp = &T512_30[(acc >> 7) & 0x07][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -11155,7 +11155,7 @@ __constant static const sph_u32 T512_63[2][16] = { @@ -11155,7 +11155,7 @@ __constant static const sph_u32 T512_63[2][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = (acc << 8) | buf[5]; \
acc = (acc << 8) | buf(5); \
rp = &T512_39[(acc >> 6) & 0x07][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -11207,7 +11207,7 @@ __constant static const sph_u32 T512_63[2][16] = { @@ -11207,7 +11207,7 @@ __constant static const sph_u32 T512_63[2][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = buf[6]; \
acc = buf(6); \
rp = &T512_48[acc >> 5][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -11242,7 +11242,7 @@ __constant static const sph_u32 T512_63[2][16] = { @@ -11242,7 +11242,7 @@ __constant static const sph_u32 T512_63[2][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = (acc << 8) | buf[7]; \
acc = (acc << 8) | buf(7); \
rp = &T512_54[(acc >> 7) & 0x07][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -12902,8 +12902,8 @@ __constant static const sph_u32 T512_60[16][16] = { @@ -12902,8 +12902,8 @@ __constant static const sph_u32 T512_60[16][16] = {
};
#define INPUT_BIG do { \
unsigned acc = buf[0]; \
const sph_u32 *rp; \
unsigned acc = buf(0); \
__constant const sph_u32 *rp; \
rp = &T512_0[acc >> 4][0]; \
m0 = rp[0]; \
m1 = rp[1]; \
@ -12938,7 +12938,7 @@ __constant static const sph_u32 T512_60[16][16] = { @@ -12938,7 +12938,7 @@ __constant static const sph_u32 T512_60[16][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = buf[1]; \
acc = buf(1); \
rp = &T512_8[acc >> 4][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -12973,7 +12973,7 @@ __constant static const sph_u32 T512_60[16][16] = { @@ -12973,7 +12973,7 @@ __constant static const sph_u32 T512_60[16][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = buf[2]; \
acc = buf(2); \
rp = &T512_16[acc >> 4][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -13008,7 +13008,7 @@ __constant static const sph_u32 T512_60[16][16] = { @@ -13008,7 +13008,7 @@ __constant static const sph_u32 T512_60[16][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = buf[3]; \
acc = buf(3); \
rp = &T512_24[acc >> 4][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -13043,7 +13043,7 @@ __constant static const sph_u32 T512_60[16][16] = { @@ -13043,7 +13043,7 @@ __constant static const sph_u32 T512_60[16][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = buf[4]; \
acc = buf(4); \
rp = &T512_32[acc >> 4][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -13078,7 +13078,7 @@ __constant static const sph_u32 T512_60[16][16] = { @@ -13078,7 +13078,7 @@ __constant static const sph_u32 T512_60[16][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = buf[5]; \
acc = buf(5); \
rp = &T512_40[acc >> 4][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -13113,7 +13113,7 @@ __constant static const sph_u32 T512_60[16][16] = { @@ -13113,7 +13113,7 @@ __constant static const sph_u32 T512_60[16][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = buf[6]; \
acc = buf(6); \
rp = &T512_48[acc >> 4][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -13148,7 +13148,7 @@ __constant static const sph_u32 T512_60[16][16] = { @@ -13148,7 +13148,7 @@ __constant static const sph_u32 T512_60[16][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = buf[7]; \
acc = buf(7); \
rp = &T512_56[acc >> 4][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -15629,8 +15629,8 @@ __constant static const sph_u32 T512_60[16][16] = { @@ -15629,8 +15629,8 @@ __constant static const sph_u32 T512_60[16][16] = {
};
#define INPUT_BIG do { \
unsigned acc = buf[0]; \
const sph_u32 *rp; \
unsigned acc = buf(0); \
__constant const sph_u32 *rp; \
rp = &T512_0[acc >> 3][0]; \
m0 = rp[0]; \
m1 = rp[1]; \
@ -15648,7 +15648,7 @@ __constant static const sph_u32 T512_60[16][16] = { @@ -15648,7 +15648,7 @@ __constant static const sph_u32 T512_60[16][16] = {
mD = rp[13]; \
mE = rp[14]; \
mF = rp[15]; \
acc = (acc << 8) | buf[1]; \
acc = (acc << 8) | buf(1); \
rp = &T512_5[(acc >> 6) & 0x1f][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -15683,7 +15683,7 @@ __constant static const sph_u32 T512_60[16][16] = { @@ -15683,7 +15683,7 @@ __constant static const sph_u32 T512_60[16][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = (acc << 8) | buf[2]; \
acc = (acc << 8) | buf(2); \
rp = &T512_15[(acc >> 4) & 0x1f][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -15701,7 +15701,7 @@ __constant static const sph_u32 T512_60[16][16] = { @@ -15701,7 +15701,7 @@ __constant static const sph_u32 T512_60[16][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = (acc << 8) | buf[3]; \
acc = (acc << 8) | buf(3); \
rp = &T512_20[(acc >> 7) & 0x1f][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -15736,7 +15736,7 @@ __constant static const sph_u32 T512_60[16][16] = { @@ -15736,7 +15736,7 @@ __constant static const sph_u32 T512_60[16][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = (acc << 8) | buf[4]; \
acc = (acc << 8) | buf(4); \
rp = &T512_30[(acc >> 5) & 0x1f][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -15771,7 +15771,7 @@ __constant static const sph_u32 T512_60[16][16] = { @@ -15771,7 +15771,7 @@ __constant static const sph_u32 T512_60[16][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = buf[5]; \
acc = buf(5); \
rp = &T512_40[acc >> 3][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -15789,7 +15789,7 @@ __constant static const sph_u32 T512_60[16][16] = { @@ -15789,7 +15789,7 @@ __constant static const sph_u32 T512_60[16][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = (acc << 8) | buf[6]; \
acc = (acc << 8) | buf(6); \
rp = &T512_45[(acc >> 6) & 0x1f][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -15824,7 +15824,7 @@ __constant static const sph_u32 T512_60[16][16] = { @@ -15824,7 +15824,7 @@ __constant static const sph_u32 T512_60[16][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = (acc << 8) | buf[7]; \
acc = (acc << 8) | buf(7); \
rp = &T512_55[(acc >> 4) & 0x1f][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -19835,8 +19835,8 @@ __constant static const sph_u32 T512_60[16][16] = { @@ -19835,8 +19835,8 @@ __constant static const sph_u32 T512_60[16][16] = {
};
#define INPUT_BIG do { \
unsigned acc = buf[0]; \
const sph_u32 *rp; \
unsigned acc = buf(0); \
__constant const sph_u32 *rp; \
rp = &T512_0[acc >> 2][0]; \
m0 = rp[0]; \
m1 = rp[1]; \
@ -19854,7 +19854,7 @@ __constant static const sph_u32 T512_60[16][16] = { @@ -19854,7 +19854,7 @@ __constant static const sph_u32 T512_60[16][16] = {
mD = rp[13]; \
mE = rp[14]; \
mF = rp[15]; \
acc = (acc << 8) | buf[1]; \
acc = (acc << 8) | buf(1); \
rp = &T512_6[(acc >> 4) & 0x3f][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -19872,7 +19872,7 @@ __constant static const sph_u32 T512_60[16][16] = { @@ -19872,7 +19872,7 @@ __constant static const sph_u32 T512_60[16][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = (acc << 8) | buf[2]; \
acc = (acc << 8) | buf(2); \
rp = &T512_12[(acc >> 6) & 0x3f][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -19907,7 +19907,7 @@ __constant static const sph_u32 T512_60[16][16] = { @@ -19907,7 +19907,7 @@ __constant static const sph_u32 T512_60[16][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = buf[3]; \
acc = buf(3); \
rp = &T512_24[acc >> 2][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -19925,7 +19925,7 @@ __constant static const sph_u32 T512_60[16][16] = { @@ -19925,7 +19925,7 @@ __constant static const sph_u32 T512_60[16][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = (acc << 8) | buf[4]; \
acc = (acc << 8) | buf(4); \
rp = &T512_30[(acc >> 4) & 0x3f][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -19943,7 +19943,7 @@ __constant static const sph_u32 T512_60[16][16] = { @@ -19943,7 +19943,7 @@ __constant static const sph_u32 T512_60[16][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = (acc << 8) | buf[5]; \
acc = (acc << 8) | buf(5); \
rp = &T512_36[(acc >> 6) & 0x3f][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -19978,7 +19978,7 @@ __constant static const sph_u32 T512_60[16][16] = { @@ -19978,7 +19978,7 @@ __constant static const sph_u32 T512_60[16][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = buf[6]; \
acc = buf(6); \
rp = &T512_48[acc >> 2][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -19996,7 +19996,7 @@ __constant static const sph_u32 T512_60[16][16] = { @@ -19996,7 +19996,7 @@ __constant static const sph_u32 T512_60[16][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = (acc << 8) | buf[7]; \
acc = (acc << 8) | buf(7); \
rp = &T512_54[(acc >> 4) & 0x3f][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -26992,8 +26992,8 @@ __constant static const sph_u32 T512_63[2][16] = { @@ -26992,8 +26992,8 @@ __constant static const sph_u32 T512_63[2][16] = {
};
#define INPUT_BIG do { \
unsigned acc = buf[0]; \
const sph_u32 *rp; \
unsigned acc = buf(0); \
__constant const sph_u32 *rp; \
rp = &T512_0[acc >> 1][0]; \
m0 = rp[0]; \
m1 = rp[1]; \
@ -27011,7 +27011,7 @@ __constant static const sph_u32 T512_63[2][16] = { @@ -27011,7 +27011,7 @@ __constant static const sph_u32 T512_63[2][16] = {
mD = rp[13]; \
mE = rp[14]; \
mF = rp[15]; \
acc = (acc << 8) | buf[1]; \
acc = (acc << 8) | buf(1); \
rp = &T512_7[(acc >> 2) & 0x7f][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -27029,7 +27029,7 @@ __constant static const sph_u32 T512_63[2][16] = { @@ -27029,7 +27029,7 @@ __constant static const sph_u32 T512_63[2][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = (acc << 8) | buf[2]; \
acc = (acc << 8) | buf(2); \
rp = &T512_14[(acc >> 3) & 0x7f][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -27047,7 +27047,7 @@ __constant static const sph_u32 T512_63[2][16] = { @@ -27047,7 +27047,7 @@ __constant static const sph_u32 T512_63[2][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = (acc << 8) | buf[3]; \
acc = (acc << 8) | buf(3); \
rp = &T512_21[(acc >> 4) & 0x7f][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -27065,7 +27065,7 @@ __constant static const sph_u32 T512_63[2][16] = { @@ -27065,7 +27065,7 @@ __constant static const sph_u32 T512_63[2][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = (acc << 8) | buf[4]; \
acc = (acc << 8) | buf(4); \
rp = &T512_28[(acc >> 5) & 0x7f][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -27083,7 +27083,7 @@ __constant static const sph_u32 T512_63[2][16] = { @@ -27083,7 +27083,7 @@ __constant static const sph_u32 T512_63[2][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = (acc << 8) | buf[5]; \
acc = (acc << 8) | buf(5); \
rp = &T512_35[(acc >> 6) & 0x7f][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -27101,7 +27101,7 @@ __constant static const sph_u32 T512_63[2][16] = { @@ -27101,7 +27101,7 @@ __constant static const sph_u32 T512_63[2][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = (acc << 8) | buf[6]; \
acc = (acc << 8) | buf(6); \
rp = &T512_42[(acc >> 7) & 0x7f][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -27136,7 +27136,7 @@ __constant static const sph_u32 T512_63[2][16] = { @@ -27136,7 +27136,7 @@ __constant static const sph_u32 T512_63[2][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = buf[7]; \
acc = buf(7); \
rp = &T512_56[acc >> 1][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -39490,8 +39490,8 @@ __constant static const sph_u32 T512_56[256][16] = { @@ -39490,8 +39490,8 @@ __constant static const sph_u32 T512_56[256][16] = {
};
#define INPUT_BIG do { \
unsigned acc = buf[0]; \
const sph_u32 *rp; \
unsigned acc = buf(0); \
__constant const sph_u32 *rp; \
rp = &T512_0[acc][0]; \
m0 = rp[0]; \
m1 = rp[1]; \
@ -39509,7 +39509,7 @@ __constant static const sph_u32 T512_56[256][16] = { @@ -39509,7 +39509,7 @@ __constant static const sph_u32 T512_56[256][16] = {
mD = rp[13]; \
mE = rp[14]; \
mF = rp[15]; \
acc = buf[1]; \
acc = buf(1); \
rp = &T512_8[acc][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -39527,7 +39527,7 @@ __constant static const sph_u32 T512_56[256][16] = { @@ -39527,7 +39527,7 @@ __constant static const sph_u32 T512_56[256][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = buf[2]; \
acc = buf(2); \
rp = &T512_16[acc][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -39545,7 +39545,7 @@ __constant static const sph_u32 T512_56[256][16] = { @@ -39545,7 +39545,7 @@ __constant static const sph_u32 T512_56[256][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = buf[3]; \
acc = buf(3); \
rp = &T512_24[acc][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -39563,7 +39563,7 @@ __constant static const sph_u32 T512_56[256][16] = { @@ -39563,7 +39563,7 @@ __constant static const sph_u32 T512_56[256][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = buf[4]; \
acc = buf(4); \
rp = &T512_32[acc][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -39581,7 +39581,7 @@ __constant static const sph_u32 T512_56[256][16] = { @@ -39581,7 +39581,7 @@ __constant static const sph_u32 T512_56[256][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = buf[5]; \
acc = buf(5); \
rp = &T512_40[acc][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -39599,7 +39599,7 @@ __constant static const sph_u32 T512_56[256][16] = { @@ -39599,7 +39599,7 @@ __constant static const sph_u32 T512_56[256][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = buf[6]; \
acc = buf(6); \
rp = &T512_48[acc][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \
@ -39617,7 +39617,7 @@ __constant static const sph_u32 T512_56[256][16] = { @@ -39617,7 +39617,7 @@ __constant static const sph_u32 T512_56[256][16] = {
mD ^= rp[13]; \
mE ^= rp[14]; \
mF ^= rp[15]; \
acc = buf[7]; \
acc = buf(7); \
rp = &T512_56[acc][0]; \
m0 ^= rp[0]; \
m1 ^= rp[1]; \

222
kernel/marucoin-mod.cl

@ -75,7 +75,7 @@ typedef long sph_s64; @@ -75,7 +75,7 @@ typedef long sph_s64;
#define SPH_GROESTL_BIG_ENDIAN 0
#define SPH_CUBEHASH_UNROLL 0
#define SPH_KECCAK_UNROLL 0
#define SPH_HAMSI_EXPAND_BIG 1
#define SPH_HAMSI_EXPAND_BIG 4
#include "blake.cl"
#include "bmw.cl"
@ -771,6 +771,8 @@ __kernel void search9(__global hash_t* hashes) @@ -771,6 +771,8 @@ __kernel void search9(__global hash_t* hashes)
barrier(CLK_GLOBAL_MEM_FENCE);
}
#ifndef X13MODOLD
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search10(__global hash_t* hashes)
{
@ -906,7 +908,7 @@ __kernel void search12(__global hash_t* hashes, __global uint* output, const ulo @@ -906,7 +908,7 @@ __kernel void search12(__global hash_t* hashes, __global uint* output, const ulo
{
uint gid = get_global_id(0);
uint offset = get_global_offset(0);
__local hash_t hash;
hash_t hash;
__global hash_t *hashp = &(hashes[gid-offset]);
for (int i = 0; i < 8; i++) {
@ -992,9 +994,223 @@ __kernel void search12(__global hash_t* hashes, __global uint* output, const ulo @@ -992,9 +994,223 @@ __kernel void search12(__global hash_t* hashes, __global uint* output, const ulo
bool result = (hash.h8[3] <= target);
if (result)
output[output[0xFF]++] = SWAP4(gid);
output[atomic_inc(output+0xFF)] = SWAP4(gid);
barrier(CLK_GLOBAL_MEM_FENCE);
}
#else
__attribute__((reqd_work_group_size(WORKSIZE, 1, 1)))
__kernel void search10(__global hash_t* hashes, __global uint* output, const ulong target)
{
uint gid = get_global_id(0);
uint offset = get_global_offset(0);
hash_t hash;
__local sph_u32 AES0[256], AES1[256], AES2[256], AES3[256];
int init = get_local_id(0);
int step = get_local_size(0);
for (int i = init; i < 256; i += step)
{
AES0[i] = AES0_C[i];
AES1[i] = AES1_C[i];
AES2[i] = AES2_C[i];
AES3[i] = AES3_C[i];
}
barrier(CLK_LOCAL_MEM_FENCE);
for (int i = 0; i < 8; i++) {
hash.h8[i] = hashes[gid-offset].h8[i];
}
// echo
{
sph_u64 W00, W01, W10, W11, W20, W21, W30, W31, W40, W41, W50, W51, W60, W61, W70, W71, W80, W81, W90, W91, WA0, WA1, WB0, WB1, WC0, WC1, WD0, WD1, WE0, WE1, WF0, WF1;
sph_u64 Vb00, Vb01, Vb10, Vb11, Vb20, Vb21, Vb30, Vb31, Vb40, Vb41, Vb50, Vb51, Vb60, Vb61, Vb70, Vb71;
Vb00 = Vb10 = Vb20 = Vb30 = Vb40 = Vb50 = Vb60 = Vb70 = 512UL;
Vb01 = Vb11 = Vb21 = Vb31 = Vb41 = Vb51 = Vb61 = Vb71 = 0;
sph_u32 K0 = 512;
sph_u32 K1 = 0;
sph_u32 K2 = 0;
sph_u32 K3 = 0;
W00 = Vb00;
W01 = Vb01;
W10 = Vb10;
W11 = Vb11;
W20 = Vb20;
W21 = Vb21;
W30 = Vb30;
W31 = Vb31;
W40 = Vb40;
W41 = Vb41;
W50 = Vb50;
W51 = Vb51;
W60 = Vb60;
W61 = Vb61;
W70 = Vb70;
W71 = Vb71;
W80 = hash.h8[0];
W81 = hash.h8[1];
W90 = hash.h8[2];
W91 = hash.h8[3];
WA0 = hash.h8[4];
WA1 = hash.h8[5];
WB0 = hash.h8[6];
WB1 = hash.h8[7];
WC0 = 0x80;
WC1 = 0;
WD0 = 0;
WD1 = 0;
WE0 = 0;
WE1 = 0x200000000000000;
WF0 = 0x200;
WF1 = 0;
for (unsigned u = 0; u < 10; u ++) {
BIG_ROUND;
}
hash.h8[0] ^= Vb00 ^ W00 ^ W80;
hash.h8[1] ^= Vb01 ^ W01 ^ W81;
hash.h8[2] ^= Vb10 ^ W10 ^ W90;
hash.h8[3] ^= Vb11 ^ W11 ^ W91;
hash.h8[4] ^= Vb20 ^ W20 ^ WA0;
hash.h8[5] ^= Vb21 ^ W21 ^ WA1;
hash.h8[6] ^= Vb30 ^ W30 ^ WB0;
hash.h8[7] ^= Vb31 ^ W31 ^ WB1;
}
// hamsi
{
sph_u32 c0 = HAMSI_IV512[0], c1 = HAMSI_IV512[1], c2 = HAMSI_IV512[2], c3 = HAMSI_IV512[3];
sph_u32 c4 = HAMSI_IV512[4], c5 = HAMSI_IV512[5], c6 = HAMSI_IV512[6], c7 = HAMSI_IV512[7];
sph_u32 c8 = HAMSI_IV512[8], c9 = HAMSI_IV512[9], cA = HAMSI_IV512[10], cB = HAMSI_IV512[11];
sph_u32 cC = HAMSI_IV512[12], cD = HAMSI_IV512[13], cE = HAMSI_IV512[14], cF = HAMSI_IV512[15];
sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
sph_u32 m8, m9, mA, mB, mC, mD, mE, mF;
sph_u32 h[16] = { c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, cA, cB, cC, cD, cE, cF };
#define buf(u) hash.h1[i + u]
for(int i = 0; i < 64; i += 8) {
INPUT_BIG;
P_BIG;
T_BIG;
}
#undef buf
#define buf(u) (u == 0 ? 0x80 : 0)
INPUT_BIG;
P_BIG;
T_BIG;
#undef buf
#define buf(u) (u == 6 ? 2 : 0)
INPUT_BIG;
PF_BIG;
T_BIG;
for (unsigned u = 0; u < 16; u ++)
hash.h4[u] = h[u];
}
// fugue
{
sph_u32 S00, S01, S02, S03, S04, S05, S06, S07, S08, S09;
sph_u32 S10, S11, S12, S13, S14, S15, S16, S17, S18, S19;
sph_u32 S20, S21, S22, S23, S24, S25, S26, S27, S28, S29;
sph_u32 S30, S31, S32, S33, S34, S35;
ulong fc_bit_count = (sph_u64) 64 << 3;
S00 = S01 = S02 = S03 = S04 = S05 = S06 = S07 = S08 = S09 = S10 = S11 = S12 = S13 = S14 = S15 = S16 = S17 = S18 = S19 = 0;
S20 = SPH_C32(0x8807a57e); S21 = SPH_C32(0xe616af75); S22 = SPH_C32(0xc5d3e4db); S23 = SPH_C32(0xac9ab027);
S24 = SPH_C32(0xd915f117); S25 = SPH_C32(0xb6eecc54); S26 = SPH_C32(0x06e8020b); S27 = SPH_C32(0x4a92efd1);
S28 = SPH_C32(0xaac6e2c9); S29 = SPH_C32(0xddb21398); S30 = SPH_C32(0xcae65838); S31 = SPH_C32(0x437f203f);
S32 = SPH_C32(0x25ea78e7); S33 = SPH_C32(0x951fddd6); S34 = SPH_C32(0xda6ed11d); S35 = SPH_C32(0xe13e3567);
FUGUE512_3((hash.h4[0x0]), (hash.h4[0x1]), (hash.h4[0x2]));
FUGUE512_3((hash.h4[0x3]), (hash.h4[0x4]), (hash.h4[0x5]));
FUGUE512_3((hash.h4[0x6]), (hash.h4[0x7]), (hash.h4[0x8]));
FUGUE512_3((hash.h4[0x9]), (hash.h4[0xA]), (hash.h4[0xB]));
FUGUE512_3((hash.h4[0xC]), (hash.h4[0xD]), (hash.h4[0xE]));
FUGUE512_3((hash.h4[0xF]), as_uint2(fc_bit_count).y, as_uint2(fc_bit_count).x);
// apply round shift if necessary
int i;
for (i = 0; i < 32; i ++) {
ROR3;
CMIX36(S00, S01, S02, S04, S05, S06, S18, S19, S20);
SMIX(S00, S01, S02, S03);
}
for (i = 0; i < 13; i ++) {
S04 ^= S00;
S09 ^= S00;
S18 ^= S00;
S27 ^= S00;
ROR9;
SMIX(S00, S01, S02, S03);
S04 ^= S00;
S10 ^= S00;
S18 ^= S00;
S27 ^= S00;
ROR9;
SMIX(S00, S01, S02, S03);
S04 ^= S00;
S10 ^= S00;
S19 ^= S00;
S27 ^= S00;
ROR9;
SMIX(S00, S01, S02, S03);
S04 ^= S00;
S10 ^= S00;
S19 ^= S00;
S28 ^= S00;
ROR8;
SMIX(S00, S01, S02, S03);
}
S04 ^= S00;
S09 ^= S00;
S18 ^= S00;
S27 ^= S00;
hash.h4[0] = SWAP4(S01);
hash.h4[1] = SWAP4(S02);
hash.h4[2] = SWAP4(S03);
hash.h4[3] = SWAP4(S04);
hash.h4[4] = SWAP4(S09);
hash.h4[5] = SWAP4(S10);
hash.h4[6] = SWAP4(S11);
hash.h4[7] = SWAP4(S12);
hash.h4[8] = SWAP4(S18);
hash.h4[9] = SWAP4(S19);
hash.h4[10] = SWAP4(S20);
hash.h4[11] = SWAP4(S21);
hash.h4[12] = SWAP4(S27);
hash.h4[13] = SWAP4(S28);
hash.h4[14] = SWAP4(S29);
hash.h4[15] = SWAP4(S30);
}
bool result = (hash.h8[3] <= target);
if (result)
output[atomic_inc(output+0xFF)] = SWAP4(gid);
barrier(CLK_GLOBAL_MEM_FENCE);
}
#endif // X13MODOLD
#endif // X13MOD_CL

Loading…
Cancel
Save