%%cl_kernel -o "-I . -cl-strict-aliasing"
//Type names definition.
#define uint8_t unsigned char
#define uint16_t unsigned short
#define uint32_t unsigned int
#define uint64_t unsigned long //Tip: unsigned long long int failed on compile (AMD).
#define _OPENCL_COMPILER
//Macros.
#pragma OPENCL EXTENSION cl_amd_media_ops : enable
#define Ch(x,y,z) bitselect(z, y, x)
#define Maj(x,y,z) bitselect(x, y, z ^ x)
#define ror(x, n) rotate(x, (uint64_t) 64-n)
#define Sigma0(x) ((ror(x,28)) ^ (ror(x,34)) ^ (ror(x,39)))
#define Sigma1(x) ((ror(x,14)) ^ (ror(x,18)) ^ (ror(x,41)))
#define sigma0(x) ((ror(x,1)) ^ (ror(x,8)) ^ (x>>7))
#define sigma1(x) ((ror(x,19)) ^ (ror(x,61)) ^ (x>>6))
#include "opencl_realman2.h"
/*
__constant uint64_t k[] = {
0x428a2f98d728ae22UL, 0x7137449123ef65cdUL, 0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL,
0x3956c25bf348b538UL, 0x59f111f1b605d019UL, 0x923f82a4af194f9bUL, 0xab1c5ed5da6d8118UL,
0xd807aa98a3030242UL, 0x12835b0145706fbeUL, 0x243185be4ee4b28cUL, 0x550c7dc3d5ffb4e2UL,
0x72be5d74f27b896fUL, 0x80deb1fe3b1696b1UL, 0x9bdc06a725c71235UL, 0xc19bf174cf692694UL,
0xe49b69c19ef14ad2UL, 0xefbe4786384f25e3UL, 0x0fc19dc68b8cd5b5UL, 0x240ca1cc77ac9c65UL,
0x2de92c6f592b0275UL, 0x4a7484aa6ea6e483UL, 0x5cb0a9dcbd41fbd4UL, 0x76f988da831153b5UL,
0x983e5152ee66dfabUL, 0xa831c66d2db43210UL, 0xb00327c898fb213fUL, 0xbf597fc7beef0ee4UL,
0xc6e00bf33da88fc2UL, 0xd5a79147930aa725UL, 0x06ca6351e003826fUL, 0x142929670a0e6e70UL,
0x27b70a8546d22ffcUL, 0x2e1b21385c26c926UL, 0x4d2c6dfc5ac42aedUL, 0x53380d139d95b3dfUL,
0x650a73548baf63deUL, 0x766a0abb3c77b2a8UL, 0x81c2c92e47edaee6UL, 0x92722c851482353bUL,
0xa2bfe8a14cf10364UL, 0xa81a664bbc423001UL, 0xc24b8b70d0f89791UL, 0xc76c51a30654be30UL,
0xd192e819d6ef5218UL, 0xd69906245565a910UL, 0xf40e35855771202aUL, 0x106aa07032bbd1b8UL,
0x19a4c116b8d2d0c8UL, 0x1e376c085141ab53UL, 0x2748774cdf8eeb99UL, 0x34b0bcb5e19b48a8UL,
0x391c0cb3c5c95a63UL, 0x4ed8aa4ae3418acbUL, 0x5b9cca4f7763e373UL, 0x682e6ff3d6b2b8a3UL,
0x748f82ee5defb2fcUL, 0x78a5636f43172f60UL, 0x84c87814a1f0ab72UL, 0x8cc702081a6439ecUL,
0x90befffa23631e28UL, 0xa4506cebde82bde9UL, 0xbef9a3f7b2c67915UL, 0xc67178f2e372532bUL,
0xca273eceea26619cUL, 0xd186b8c721c0c207UL, 0xeada7dd6cde0eb1eUL, 0xf57d4f7fee6ed178UL,
0x06f067aa72176fbaUL, 0x0a637dc5a2c898a6UL, 0x113f9804bef90daeUL, 0x1b710b35131c471bUL,
0x28db77f523047d84UL, 0x32caab7b40c72493UL, 0x3c9ebe0a15c9bebcUL, 0x431d67c49c100d4cUL,
0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL, 0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL,
};
*/
#define k0 0x428a2f98d728ae22U
#define k1 0x7137449123ef65cdUL
#define k2 0xb5c0fbcfec4d3b2fUL
#define k3 0xe9b5dba58189dbbcUL
#define k4 0x3956c25bf348b538UL
#define k5 0x59f111f1b605d019UL
#define k6 0x923f82a4af194f9bUL
#define k7 0xab1c5ed5da6d8118UL
#define k8 0xd807aa98a3030242UL
#define k9 0x12835b0145706fbeUL
#define k10 0x243185be4ee4b28cUL
#define k11 0x550c7dc3d5ffb4e2UL
#define k12 0x72be5d74f27b896fUL
#define k13 0x80deb1fe3b1696b1UL
#define k14 0x9bdc06a725c71235UL
#define k15 0xc19bf174cf692694UL
#define k16 0xe49b69c19ef14ad2UL
#define k17 0xefbe4786384f25e3UL
#define k18 0x0fc19dc68b8cd5b5UL
#define k19 0x240ca1cc77ac9c65UL
#define k20 0x2de92c6f592b0275UL
#define k21 0x4a7484aa6ea6e483UL
#define k22 0x5cb0a9dcbd41fbd4UL
#define k23 0x76f988da831153b5UL
#define k24 0x983e5152ee66dfabUL
#define k25 0xa831c66d2db43210UL
#define k26 0xb00327c898fb213fUL
#define k27 0xbf597fc7beef0ee4UL
#define k28 0xc6e00bf33da88fc2UL
#define k29 0xd5a79147930aa725UL
#define k30 0x06ca6351e003826fUL
#define k31 0x142929670a0e6e70UL
#define k32 0x27b70a8546d22ffcUL
#define k33 0x2e1b21385c26c926UL
#define k34 0x4d2c6dfc5ac42aedUL
#define k35 0x53380d139d95b3dfUL
#define k36 0x650a73548baf63deUL
#define k37 0x766a0abb3c77b2a8UL
#define k38 0x81c2c92e47edaee6UL
#define k39 0x92722c851482353bUL
#define k40 0xa2bfe8a14cf10364UL
#define k41 0xa81a664bbc423001UL
#define k42 0xc24b8b70d0f89791UL
#define k43 0xc76c51a30654be30UL
#define k44 0xd192e819d6ef5218UL
#define k45 0xd69906245565a910UL
#define k46 0xf40e35855771202aUL
#define k47 0x106aa07032bbd1b8UL
#define k48 0x19a4c116b8d2d0c8UL
#define k49 0x1e376c085141ab53UL
#define k50 0x2748774cdf8eeb99UL
#define k51 0x34b0bcb5e19b48a8UL
#define k52 0x391c0cb3c5c95a63UL
#define k53 0x4ed8aa4ae3418acbUL
#define k54 0x5b9cca4f7763e373UL
#define k55 0x682e6ff3d6b2b8a3UL
#define k56 0x748f82ee5defb2fcUL
#define k57 0x78a5636f43172f60UL
#define k58 0x84c87814a1f0ab72UL
#define k59 0x8cc702081a6439ecUL
#define k60 0x90befffa23631e28UL
#define k61 0xa4506cebde82bde9UL
#define k62 0xbef9a3f7b2c67915UL
#define k63 0xc67178f2e372532bUL
#define k64 0xca273eceea26619cUL
#define k65 0xd186b8c721c0c207UL
#define k66 0xeada7dd6cde0eb1eUL
#define k67 0xf57d4f7fee6ed178UL
#define k68 0x06f067aa72176fbaUL
#define k69 0x0a637dc5a2c898a6UL
#define k70 0x113f9804bef90daeUL
#define k71 0x1b710b35131c471bUL
#define k72 0x28db77f523047d84UL
#define k73 0x32caab7b40c72493UL
#define k74 0x3c9ebe0a15c9bebcUL
#define k75 0x431d67c49c100d4cUL
#define k76 0x4cc5d4becb3e42b6UL
#define k77 0x597f299cfc657e2aUL
#define k78 0x5fcb6fab3ad6faecUL
#define k79 0x6c44198c4a475817UL
#define ALPHADIGITS
#define H0 0xcbbb9d5dc1059ed8UL
#define H1 0x629a292a367cd507UL
#define H2 0x9159015a3070dd17UL
#define H3 0x152fecd8f70e5939UL
#define H4 0x67332667ffc00b31UL
#define H5 0x8eb44a8768581511UL
#define H6 0xdb0c2e0d64f98fa7UL
#define H7 0x47b5481dbefa4fa4UL
inline bool sha384_block(__const uint64_t x) {
__private ulong8 v = {H0, H1, H2, H3, H4,H5,H6,H7};
__private uint64_t w[16]; //={SRC0, SRC1, SRC2, SRC3, SRC4, SRC5, SRC6, SRC7, SRC8, SRC9, SRCa, SRCb,SRCc,SRCd, SRCe, SRCf};
// __private uint64_t t0, t1;
__private ulong2 _t;
#define t0 _t.s0
#define t1 _t.s1
/*
#ifdef ALPHADIGITS
t.s1=x;
#define SET_W_A8 \
w[A8] = (charset[t.s1&63]<<56)^(charset[(t.s1>>6)&63]<<48)^(charset[(t.s1>>12)&63]<<40) \
^(charset[(t.s1>>18)&63]<<32)^(charset[(t.s1>>24)&63]<<24) \
^(charset[(t.s1>>30)&63]<<16)^(charset[(t.s1>>36)&63]<<8)^B;
SET_W_A8
#else
w[A8]= (((x<<14)^x)&0x3f3f3f3f3f3f3f3f)+0x3f3f3f3f3f3f3f3f;
#endif
*/
#pragma unroll
for (int i = 0; i < A8; i++) w[i] = SRC[i];
#ifdef ALPHADIGITS
w[A8] = (charset[x&63]<<56)^(charset[(x>>6)&63]<<48)^(charset[(x>>12)&63]<<40) \
^(charset[(x>>18)&63]<<32)^(charset[(x>>24)&63]<<24) \
^(charset[(x>>30)&63]<<16)^(charset[(x>>36)&63]<<8)^B;
#else
w[A8]= (((x<<14)^x)&0x3f3f3f3f3f3f3f3f)+0x3f3f3f3f3f3f3f3f;
#endif
#pragma unroll
for (int i = A8+1; i < 16; i++) w[i] = SRC[i];
/* i, i-2, i-15, i, i-7*/
#define T1(a,b,c,d,e,f,g,h,i,j1) k##i + w[0x##j1] + v.s##h + Sigma1(v.s##e) + Ch(v.s##e, v.s##f, v.s##g)
#define T2(a,b,c) Maj(v.s##a, v.s##b, v.s##c) + Sigma0(v.s##a)
#define R0(a,b,c,d,e,f,g,h, i, j1, j2, j3, j4) \
t0 = T1(a,b,c,d,e,f,g,h, i, j1); \
t1 = T2(a,b,c);\
v.s##h= v.s##d + t0; v.s##d = t0 + t1;
#define R(a,b,c,d,e,f,g,h, i, j1, j2, j3, j4)\
w[0x##j1] = sigma1(w[0x##j2]) + sigma0(w[0x##j3]) + w[0x##j1] + w[0x##j4]; \
R0(a,b,c,d,e,f,g,h, i, j1, j2, j3, j4)
#define Rlast(a,b,c,d,e,f,g,h, i, j1, j2, j3, j4)\
v.s##d = k##i + sigma1(w[0x##j2]) + sigma0(w[0x##j3]) + w[0x##j1] + w[0x##j4] + v.s##h + Sigma1(v.s##e) + Ch(v.s##e, v.s##f, v.s##g) + T2(a,b,c) +H0;
R0(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, e, 1, 9);
R0(3, 0, 1, 2, 7, 4, 5, 6, 1, 1, f, 2, a);
R0(2, 3, 0, 1, 6, 7, 4, 5, 2, 2, 0, 3, b);
R0(1, 2, 3, 0, 5, 6, 7, 4, 3, 3, 1, 4, c);
R0(0, 1, 2, 3, 4, 5, 6, 7, 4, 4, 2, 5, d);
R0(3, 0, 1, 2, 7, 4, 5, 6, 5, 5, 3, 6, e);
R0(2, 3, 0, 1, 6, 7, 4, 5, 6, 6, 4, 7, f);
R0(1, 2, 3, 0, 5, 6, 7, 4, 7, 7, 5, 8, 0);
R0(0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 6, 9, 1);
R0(3, 0, 1, 2, 7, 4, 5, 6, 9, 9, 7, a, 2);
R0(2, 3, 0, 1, 6, 7, 4, 5, 10, a, 8, b, 3);
R0(1, 2, 3, 0, 5, 6, 7, 4, 11, b, 9, c, 4);
R0(0, 1, 2, 3, 4, 5, 6, 7, 12, c, a, d, 5);
R0(3, 0, 1, 2, 7, 4, 5, 6, 13, d, b, e, 6);
R0(2, 3, 0, 1, 6, 7, 4, 5, 14, e, c, f, 7);
R0(1, 2, 3, 0, 5, 6, 7, 4, 15, f, d, 0, 8);
R(0, 1, 2, 3, 4, 5, 6, 7, 16, 0, e, 1, 9);
R(3, 0, 1, 2, 7, 4, 5, 6, 17, 1, f, 2, a);
R(2, 3, 0, 1, 6, 7, 4, 5, 18, 2, 0, 3, b);
R(1, 2, 3, 0, 5, 6, 7, 4, 19, 3, 1, 4, c);
R(0, 1, 2, 3, 4, 5, 6, 7, 20, 4, 2, 5, d);
R(3, 0, 1, 2, 7, 4, 5, 6, 21, 5, 3, 6, e);
R(2, 3, 0, 1, 6, 7, 4, 5, 22, 6, 4, 7, f);
R(1, 2, 3, 0, 5, 6, 7, 4, 23, 7, 5, 8, 0);
R(0, 1, 2, 3, 4, 5, 6, 7, 24, 8, 6, 9, 1);
R(3, 0, 1, 2, 7, 4, 5, 6, 25, 9, 7, a, 2);
R(2, 3, 0, 1, 6, 7, 4, 5, 26, a, 8, b, 3);
R(1, 2, 3, 0, 5, 6, 7, 4, 27, b, 9, c, 4);
R(0, 1, 2, 3, 4, 5, 6, 7, 28, c, a, d, 5);
R(3, 0, 1, 2, 7, 4, 5, 6, 29, d, b, e, 6);
R(2, 3, 0, 1, 6, 7, 4, 5, 30, e, c, f, 7);
R(1, 2, 3, 0, 5, 6, 7, 4, 31, f, d, 0, 8);
R(0, 1, 2, 3, 4, 5, 6, 7, 32, 0, e, 1, 9);
R(3, 0, 1, 2, 7, 4, 5, 6, 33, 1, f, 2, a);
R(2, 3, 0, 1, 6, 7, 4, 5, 34, 2, 0, 3, b);
R(1, 2, 3, 0, 5, 6, 7, 4, 35, 3, 1, 4, c);
R(0, 1, 2, 3, 4, 5, 6, 7, 36, 4, 2, 5, d);
R(3, 0, 1, 2, 7, 4, 5, 6, 37, 5, 3, 6, e);
R(2, 3, 0, 1, 6, 7, 4, 5, 38, 6, 4, 7, f);
R(1, 2, 3, 0, 5, 6, 7, 4, 39, 7, 5, 8, 0);
R(0, 1, 2, 3, 4, 5, 6, 7, 40, 8, 6, 9, 1);
R(3, 0, 1, 2, 7, 4, 5, 6, 41, 9, 7, a, 2);
R(2, 3, 0, 1, 6, 7, 4, 5, 42, a, 8, b, 3);
R(1, 2, 3, 0, 5, 6, 7, 4, 43, b, 9, c, 4);
R(0, 1, 2, 3, 4, 5, 6, 7, 44, c, a, d, 5);
R(3, 0, 1, 2, 7, 4, 5, 6, 45, d, b, e, 6);
R(2, 3, 0, 1, 6, 7, 4, 5, 46, e, c, f, 7);
R(1, 2, 3, 0, 5, 6, 7, 4, 47, f, d, 0, 8);
R(0, 1, 2, 3, 4, 5, 6, 7, 48, 0, e, 1, 9);
R(3, 0, 1, 2, 7, 4, 5, 6, 49, 1, f, 2, a);
R(2, 3, 0, 1, 6, 7, 4, 5, 50, 2, 0, 3, b);
R(1, 2, 3, 0, 5, 6, 7, 4, 51, 3, 1, 4, c);
R(0, 1, 2, 3, 4, 5, 6, 7, 52, 4, 2, 5, d);
R(3, 0, 1, 2, 7, 4, 5, 6, 53, 5, 3, 6, e);
R(2, 3, 0, 1, 6, 7, 4, 5, 54, 6, 4, 7, f);
R(1, 2, 3, 0, 5, 6, 7, 4, 55, 7, 5, 8, 0);
R(0, 1, 2, 3, 4, 5, 6, 7, 56, 8, 6, 9, 1);
R(3, 0, 1, 2, 7, 4, 5, 6, 57, 9, 7, a, 2);
R(2, 3, 0, 1, 6, 7, 4, 5, 58, a, 8, b, 3);
R(1, 2, 3, 0, 5, 6, 7, 4, 59, b, 9, c, 4);
R(0, 1, 2, 3, 4, 5, 6, 7, 60, c, a, d, 5);
R(3, 0, 1, 2, 7, 4, 5, 6, 61, d, b, e, 6);
R(2, 3, 0, 1, 6, 7, 4, 5, 62, e, c, f, 7);
R(1, 2, 3, 0, 5, 6, 7, 4, 63, f, d, 0, 8);
R(0, 1, 2, 3, 4, 5, 6, 7, 64, 0, e, 1, 9);
R(3, 0, 1, 2, 7, 4, 5, 6, 65, 1, f, 2, a);
R(2, 3, 0, 1, 6, 7, 4, 5, 66, 2, 0, 3, b);
R(1, 2, 3, 0, 5, 6, 7, 4, 67, 3, 1, 4, c);
R(0, 1, 2, 3, 4, 5, 6, 7, 68, 4, 2, 5, d);
R(3, 0, 1, 2, 7, 4, 5, 6, 69, 5, 3, 6, e);
R(2, 3, 0, 1, 6, 7, 4, 5, 70, 6, 4, 7, f);
R(1, 2, 3, 0, 5, 6, 7, 4, 71, 7, 5, 8, 0);
R(0, 1, 2, 3, 4, 5, 6, 7, 72, 8, 6, 9, 1);
R(3, 0, 1, 2, 7, 4, 5, 6, 73, 9, 7, a, 2);
R(2, 3, 0, 1, 6, 7, 4, 5, 74, a, 8, b, 3);
R(1, 2, 3, 0, 5, 6, 7, 4, 75, b, 9, c, 4);
R(0, 1, 2, 3, 4, 5, 6, 7, 76, c, a, d, 5);
R(3, 0, 1, 2, 7, 4, 5, 6, 77, d, b, e, 6);
R(2, 3, 0, 1, 6, 7, 4, 5, 78, e, c, f, 7);
Rlast(1, 2, 3, 0, 5, 6, 7, 4, 79, f, d, 0, 8);
/* R(1, 2, 3, 0, 5, 6, 7, 4, 79);*/
//v.s0+=H0;
//v.s0 = sigma1(w[13]) + sigma0(w[0]) + w[15] + w[8] + k79 + v.s4 + Sigma1(v.s5) + Ch(v.s5, v.s6, v.s7) + Maj(v.s1, v.s2, v.s3) + Sigma0(v.s1) +H0;
return CONDITION;
}
__kernel
void kernel_sha384(__const uint64_t base, __global uint32_t * out_buffer) {
if(sha384_block(base + get_global_id(0)))
out_buffer[get_global_id(0)>>24] = get_global_id(0)&0xffffff;
}