%qtconsole --colors=linux
from __future__ import division
import numpy as np
import pyopencl as cl
import pyopencl.array
import pyopencl.tools
%load_ext pyopencl.ipython_ext
The pyopencl.ipython_ext extension is already loaded. To reload it, use: %reload_ext pyopencl.ipython_ext
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
device=ctx.devices[0]
print device.max_mem_alloc_size
print device.max_work_group_size
1073741824 256
%%cl_kernel -o "-cl-fast-relaxed-math"
__kernel void get_id(__global int *c, const int x)
{
int gid = get_global_id(0);
c[gid] = gid+x;
}
c = cl.array.empty(queue, 100, dtype=np.int32)
get_id(queue, (100,), None, c.data, np.int32(33))
<pyopencl._cl.Event at 0x7f4cc04f7b40>
c.get()
array([ 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132], dtype=int32)
%%cl_kernel
__kernel void ComputeHashes(__global uint4 * hashes,
__constant char * charset,
__constant char * base,
const uint charsetLength,
const uint prefixLength,
const uint plainTextLength)
{
uint X[16];
uint id = get_global_id(0);
int counter = id;
int oc, a = 0, carry = 0;
X[0] = 0; X[1] = 0; X[2] = 0; X[3] = 0;
X[4] = 0; X[5] = 0; X[6] = 0; X[7] = 0;
X[8] = 0; X[9] = 0; X[10] = 0; X[11] = 0;
X[12] = 0; X[13] = 0; X[14] = 0; X[15] = 0;
for (int i = 0; i < prefixLength; ++i)
{
X[i >> 2] |= base[i] << ((i & 3) << 3);
}
for (int i = prefixLength; i < plainTextLength; ++i)
{
oc = counter / charsetLength;
a = base[i] + carry + counter - oc * charsetLength;
if (a >= charsetLength) { a -= charsetLength; carry = 1; }
else carry = 0;
X[i >> 2] |= charset[a] << ((i & 3) << 3);
counter = oc;
}
X[plainTextLength >> 2] |= ((uint)(0x00000080) << ((plainTextLength & 3) << 3));
uint A, B, C, D;
#define S(x,n) ((x << n) | ((x & 0xFFFFFFFF) >> (32 - n)))
#define P(a,b,c,d,k,s,t) \
{ \
a += F(b,c,d) + X[k] + t; a = S(a,s) + b; \
} \
#define P0(a,b,c,d,k,s,t) \
{ \
a += F(b,c,d) + t; a = S(a,s) + b; \
} \
#define P14(a,b,c,d,k,s,t) \
{ \
a += F(b,c,d) + (plainTextLength << 3) + t; a = S(a,s) + b; \
} \
A = 0x67452301;
B = 0xefcdab89;
C = 0x98badcfe;
D = 0x10325476;
#define F(x,y,z) (z ^ (x & (y ^ z)))
P( A, B, C, D, 0, 7, 0xD76AA478 );
P( D, A, B, C, 1, 12, 0xE8C7B756 );
P( C, D, A, B, 2, 17, 0x242070DB );
P( B, C, D, A, 3, 22, 0xC1BDCEEE );
P( A, B, C, D, 4, 7, 0xF57C0FAF );
P( D, A, B, C, 5, 12, 0x4787C62A );
P( C, D, A, B, 6, 17, 0xA8304613 );
P( B, C, D, A, 7, 22, 0xFD469501 );
P( A, B, C, D, 8, 7, 0x698098D8 );
P( D, A, B, C, 9, 12, 0x8B44F7AF );
P( C, D, A, B, 10, 17, 0xFFFF5BB1 );
P( B, C, D, A, 11, 22, 0x895CD7BE );
P( A, B, C, D, 12, 7, 0x6B901122 );
P( D, A, B, C, 13, 12, 0xFD987193 );
P14( C, D, A, B, 14, 17, 0xA679438E );
P( B, C, D, A, 15, 22, 0x49B40821 );
#undef F
#define F(x,y,z) (y ^ (z & (x ^ y)))
P( A, B, C, D, 1, 5, 0xF61E2562 );
P( D, A, B, C, 6, 9, 0xC040B340 );
P( C, D, A, B, 11, 14, 0x265E5A51 );
P( B, C, D, A, 0, 20, 0xE9B6C7AA );
P( A, B, C, D, 5, 5, 0xD62F105D );
P( D, A, B, C, 10, 9, 0x02441453 );
P( C, D, A, B, 15, 14, 0xD8A1E681 );
P( B, C, D, A, 4, 20, 0xE7D3FBC8 );
P( A, B, C, D, 9, 5, 0x21E1CDE6 );
P14( D, A, B, C, 14, 9, 0xC33707D6 );
P( C, D, A, B, 3, 14, 0xF4D50D87 );
P( B, C, D, A, 8, 20, 0x455A14ED );
P( A, B, C, D, 13, 5, 0xA9E3E905 );
P( D, A, B, C, 2, 9, 0xFCEFA3F8 );
P( C, D, A, B, 7, 14, 0x676F02D9 );
P( B, C, D, A, 12, 20, 0x8D2A4C8A );
#undef F
#define F(x,y,z) (x ^ y ^ z)
P( A, B, C, D, 5, 4, 0xFFFA3942 );
P( D, A, B, C, 8, 11, 0x8771F681 );
P( C, D, A, B, 11, 16, 0x6D9D6122 );
P14( B, C, D, A, 14, 23, 0xFDE5380C );
P( A, B, C, D, 1, 4, 0xA4BEEA44 );
P( D, A, B, C, 4, 11, 0x4BDECFA9 );
P( C, D, A, B, 7, 16, 0xF6BB4B60 );
P( B, C, D, A, 10, 23, 0xBEBFBC70 );
P( A, B, C, D, 13, 4, 0x289B7EC6 );
P( D, A, B, C, 0, 11, 0xEAA127FA );
P( C, D, A, B, 3, 16, 0xD4EF3085 );
P( B, C, D, A, 6, 23, 0x04881D05 );
P( A, B, C, D, 9, 4, 0xD9D4D039 );
P( D, A, B, C, 12, 11, 0xE6DB99E5 );
P( C, D, A, B, 15, 16, 0x1FA27CF8 );
P( B, C, D, A, 2, 23, 0xC4AC5665 );
#undef F
#define F(x,y,z) (y ^ (x | ~z))
P( A, B, C, D, 0, 6, 0xF4292244 );
P( D, A, B, C, 7, 10, 0x432AFF97 );
P14( C, D, A, B, 14, 15, 0xAB9423A7 );
P( B, C, D, A, 5, 21, 0xFC93A039 );
P( A, B, C, D, 12, 6, 0x655B59C3 );
P( D, A, B, C, 3, 10, 0x8F0CCC92 );
P( C, D, A, B, 10, 15, 0xFFEFF47D );
P( B, C, D, A, 1, 21, 0x85845DD1 );
P( A, B, C, D, 8, 6, 0x6FA87E4F );
P( D, A, B, C, 15, 10, 0xFE2CE6E0 );
P( C, D, A, B, 6, 15, 0xA3014314 );
P( B, C, D, A, 13, 21, 0x4E0811A1 );
P( A, B, C, D, 4, 6, 0xF7537E82 );
P( D, A, B, C, 11, 10, 0xBD3AF235 );
P( C, D, A, B, 2, 15, 0x2AD7D2BB );
P( B, C, D, A, 9, 21, 0xEB86D391 );
#undef F
hashes[id].x = A + 0x67452301;
hashes[id].y = B + 0xefcdab89;
hashes[id].z = C + 0x98badcfe;
hashes[id].w = D + 0x10325476;
}
from time import time
t0=time()
N=100000
hashes = cl.array.empty(queue, N, dtype=cl.array.vec.uint4)
charset_str = "abcefghijklmnopqrstuvwxyz0123456789"
charset_host = np.array(map(ord, charset_str), dtype=np.uint8)
charset = cl.array.to_device(queue, charset_host)
prefixLength = np.uint32(17)
base = cl.array.to_device(queue, np.array([ord('x')]*prefixLength+[0, 0, 0, 0], dtype=np.uint8))
charsetLength = np.uint32(charset.shape[0])
plainTextLength = np.uint32(base.shape[0])
ComputeHashes(queue, (N,), None, hashes.data, charset.data, base.data, charsetLength, prefixLength, plainTextLength).wait()
rtn = hashes.get()
print time()-t0
0.00487613677979
from itertools import product, islice
from hashlib import md5
def hd(n):
return "".join("%02x"%((x>>i)&0xff) for x in n for i in range(0,32,8))
a= map(hd, rtn)
t0 = time()
h = (('x'*prefixLength+"".join(reversed(s))) for s in islice(product(*[charset_str ]*4), len(a)))
b = map(lambda x:md5(x).digest(), h)
print time()-t0
1.01328802109
!set PYOPENCL_COMPILER_OUTPUT=1
%%cl_kernel
/*
* This software is Copyright (c) 2012 Myrice <qqlddg at gmail dot com>
* and it is hereby released to the general public under the following terms:
* Redistribution and use in source and binary forms, with or without modification, are permitted.
*/
#ifdef cl_khr_byte_addressable_store
#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : disable
#endif
#define uint8_t unsigned char
#define uint32_t unsigned int
#define uint64_t unsigned long
#define SALT_SIZE 0
#define BINARY_SIZE 8
#define FULL_BINARY_SIZE 64
#define PLAINTEXT_LENGTH 20
#define CIPHERTEXT_LENGTH 128
#define KEYS_PER_CRYPT (1024*512)
#define ITERATIONS 1
#define MIN_KEYS_PER_CRYPT (KEYS_PER_CRYPT)
#define MAX_KEYS_PER_CRYPT (ITERATIONS*KEYS_PER_CRYPT)
/// Warning: This version of SWAP64(n) is slow and avoid bugs on AMD GPUs(7970)
#define SWAP64(n) as_ulong(as_uchar8(n).s76543210)
/*#define SWAP64(n) \
(((n) << 56) \
| (((n) & 0xff00) << 40) \
| (((n) & 0xff0000) << 24) \
| (((n) & 0xff000000) << 8) \
| (((n) >> 8) & 0xff000000) \
| (((n) >> 24) & 0xff0000) \
| (((n) >> 40) & 0xff00) \
| ((n) >> 56))
*/
#define rol(x,n) ((x << n) | (x >> (64-n)))
#define ror(x,n) ((x >> n) | (x << (64-n)))
#define Ch(x,y,z) ((x & y) ^ ( (~x) & z))
#define Maj(x,y,z) ((x & y) ^ (x & z) ^ (y & z))
#define Sigma0(x) ((ror(x,28)) ^ (ror(x,34)) ^ (ror(x,39)))
#define Sigma1(x) ((ror(x,14)) ^ (ror(x,18)) ^ (ror(x,41)))
#define sigma0(x) ((ror(x,1)) ^ (ror(x,8)) ^(x>>7))
#define sigma1(x) ((ror(x,19)) ^ (ror(x,61)) ^(x>>6))
typedef struct { // notice memory align problem
uint64_t H[8];
uint32_t buffer[32]; //1024 bits
uint32_t buflen;
} sha512_ctx;
typedef struct {
uint8_t length;
char v[PLAINTEXT_LENGTH+1];
} sha512_key;
/* Macros for reading/writing chars from int32's */
#define PUTCHAR(buf, index, val) (buf)[(index)>>2] = ((buf)[(index)>>2] & ~(0xffU << (((index) & 3) << 3))) + ((val) << (((index) & 3) << 3))
__constant uint64_t k[] = {
0x428a2f98d728ae22UL, 0x7137449123ef65cdUL, 0xb5c0fbcfec4d3b2fUL,
0xe9b5dba58189dbbcUL,
0x3956c25bf348b538UL, 0x59f111f1b605d019UL, 0x923f82a4af194f9bUL,
0xab1c5ed5da6d8118UL,
0xd807aa98a3030242UL, 0x12835b0145706fbeUL, 0x243185be4ee4b28cUL,
0x550c7dc3d5ffb4e2UL,
0x72be5d74f27b896fUL, 0x80deb1fe3b1696b1UL, 0x9bdc06a725c71235UL,
0xc19bf174cf692694UL,
0xe49b69c19ef14ad2UL, 0xefbe4786384f25e3UL, 0x0fc19dc68b8cd5b5UL,
0x240ca1cc77ac9c65UL,
0x2de92c6f592b0275UL, 0x4a7484aa6ea6e483UL, 0x5cb0a9dcbd41fbd4UL,
0x76f988da831153b5UL,
0x983e5152ee66dfabUL, 0xa831c66d2db43210UL, 0xb00327c898fb213fUL,
0xbf597fc7beef0ee4UL,
0xc6e00bf33da88fc2UL, 0xd5a79147930aa725UL, 0x06ca6351e003826fUL,
0x142929670a0e6e70UL,
0x27b70a8546d22ffcUL, 0x2e1b21385c26c926UL, 0x4d2c6dfc5ac42aedUL,
0x53380d139d95b3dfUL,
0x650a73548baf63deUL, 0x766a0abb3c77b2a8UL, 0x81c2c92e47edaee6UL,
0x92722c851482353bUL,
0xa2bfe8a14cf10364UL, 0xa81a664bbc423001UL, 0xc24b8b70d0f89791UL,
0xc76c51a30654be30UL,
0xd192e819d6ef5218UL, 0xd69906245565a910UL, 0xf40e35855771202aUL,
0x106aa07032bbd1b8UL,
0x19a4c116b8d2d0c8UL, 0x1e376c085141ab53UL, 0x2748774cdf8eeb99UL,
0x34b0bcb5e19b48a8UL,
0x391c0cb3c5c95a63UL, 0x4ed8aa4ae3418acbUL, 0x5b9cca4f7763e373UL,
0x682e6ff3d6b2b8a3UL,
0x748f82ee5defb2fcUL, 0x78a5636f43172f60UL, 0x84c87814a1f0ab72UL,
0x8cc702081a6439ecUL,
0x90befffa23631e28UL, 0xa4506cebde82bde9UL, 0xbef9a3f7b2c67915UL,
0xc67178f2e372532bUL,
0xca273eceea26619cUL, 0xd186b8c721c0c207UL, 0xeada7dd6cde0eb1eUL,
0xf57d4f7fee6ed178UL,
0x06f067aa72176fbaUL, 0x0a637dc5a2c898a6UL, 0x113f9804bef90daeUL,
0x1b710b35131c471bUL,
0x28db77f523047d84UL, 0x32caab7b40c72493UL, 0x3c9ebe0a15c9bebcUL,
0x431d67c49c100d4cUL,
0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL, 0x5fcb6fab3ad6faecUL,
0x6c44198c4a475817UL,
};
inline void sha512(__global const char* password, uint8_t pass_len,
__global uint64_t* hash, uint32_t offset)
{
__private sha512_ctx ctx;
uint32_t* b32 = ctx.buffer;
//set password to buffer
for (uint32_t i = 0; i < pass_len; i++) {
PUTCHAR(b32,i,password[i]);
}
ctx.buflen = pass_len;
//append 1 to ctx buffer
uint32_t length = ctx.buflen;
PUTCHAR(b32, length, 0x80);
while((++length & 3) != 0) {
PUTCHAR(b32, length, 0);
}
uint32_t* buffer32 = b32+(length>>2);
for(uint32_t i = length; i < 128; i+=4) {// append 0 to 128
*buffer32++=0;
}
//append length to buffer
uint64_t *buffer64 = (uint64_t *)ctx.buffer;
buffer64[15] = SWAP64((uint64_t) ctx.buflen * 8);
// sha512 main
int i;
uint64_t a = 0x6a09e667f3bcc908UL;
uint64_t b = 0xbb67ae8584caa73bUL;
uint64_t c = 0x3c6ef372fe94f82bUL;
uint64_t d = 0xa54ff53a5f1d36f1UL;
uint64_t e = 0x510e527fade682d1UL;
uint64_t f = 0x9b05688c2b3e6c1fUL;
uint64_t g = 0x1f83d9abfb41bd6bUL;
uint64_t h = 0x5be0cd19137e2179UL;
__private uint64_t w[16];
uint64_t *data = (uint64_t *) ctx.buffer;
#pragma unroll 16
for (i = 0; i < 16; i++)
w[i] = SWAP64(data[i]);
uint64_t t1, t2;
#pragma unroll 16
for (i = 0; i < 16; i++) {
t1 = k[i] + w[i] + h + Sigma1(e) + Ch(e, f, g);
t2 = Maj(a, b, c) + Sigma0(a);
h = g;
g = f;
f = e;
e = d + t1;
d = c;
c = b;
b = a;
a = t1 + t2;
}
for (i = 16; i < 80; i++) {
w[i & 15] =sigma1(w[(i - 2) & 15]) + sigma0(w[(i - 15) & 15]) + w[(i -16) & 15] + w[(i - 7) & 15];
t1 = k[i] + w[i & 15] + h + Sigma1(e) + Ch(e, f, g);
t2 = Maj(a, b, c) + Sigma0(a);
h = g;
g = f;
f = e;
e = d + t1;
d = c;
c = b;
b = a;
a = t1 + t2;
}
hash[offset] = SWAP64(a);
}
__kernel void kernel_sha512(
__global const sha512_key *password,
__global uint64_t *hash)
{
uint32_t idx = get_global_id(0);
for(uint32_t it = 0; it < ITERATIONS; ++it) {
uint32_t offset = idx+it*KEYS_PER_CRYPT;
sha512(password[offset].v, password[offset].length,
hash, offset);
}
}
__kernel void kernel_cmp(
__constant uint64_t* binary,
__global uint64_t *hash,
__global uint32_t* result)
{
uint32_t idx = get_global_id(0);
if(idx == 0)
*result = 0;
for(uint32_t it = 0; it < ITERATIONS; ++it) {
uint32_t offset = idx+it*KEYS_PER_CRYPT;
if (*binary == hash[offset])
*result = 1;
}
}
/usr/local/lib/python2.7/dist-packages/pyopencl-2014.1-py2.7-linux-x86_64.egg/pyopencl/__init__.py:59: CompilerWarning: From-source build succeeded, but resulted in non-empty logs: Build on <pyopencl.Device 'Tahiti' on 'AMD Accelerated Parallel Processing' at 0x29706d0> succeeded, but said: LOOP UNROLL: pragma unroll (line 162) Unrolled as requested! LOOP UNROLL: pragma unroll (line 167) Unrolled as requested! warn(text, CompilerWarning)
"""
typedef struct {
uint8_t length;
char v[PLAINTEXT_LENGTH+1];
} sha512_key;
"""
sha512_dtype = np.dtype([("length", np.uint8), ("v", 'u1', 21)])
pyopencl.tools.register_dtype(sha512_dtype, 'sha512_key')
--------------------------------------------------------------------------- RuntimeError Traceback (most recent call last) <ipython-input-35-8fb68940bc6e> in <module>() 6 """ 7 sha512_dtype = np.dtype([("length", np.uint8), ("v", 'u1', 21)]) ----> 8 pyopencl.tools.register_dtype(sha512_dtype, 'sha512_key') /usr/local/lib/python2.7/dist-packages/pyopencl-2014.1-py2.7-linux-x86_64.egg/pyopencl/compyte/dtypes.pyc in register_dtype(dtype, c_names, alias_ok) 144 if not alias_ok and dtype in DTYPE_TO_NAME: 145 raise RuntimeError("dtype '%s' already registered (as '%s', new names '%s')" --> 146 % (dtype, DTYPE_TO_NAME[dtype], ", ".join(c_names))) 147 148 get_or_register_dtype(c_names, dtype) RuntimeError: dtype '[('length', 'u1'), ('v', 'u1', (21,))]' already registered (as 'sha512_key', new names 'sha512_key')
from time import time
t0=time()
keys = ["", "bbb", "ccc"]
N=len(keys)
keys_host = np.array([(len(k), map(ord,k)+[0]*(21-len(k)) ) for k in keys], dtype=sha512_dtype)
keys_cl = cl.array.to_device(queue, keys_host)
hashes = cl.array.zeros(queue, N*8, dtype=cl.array.vec.ulong8)
print hashes.get()
rtn = kernel_sha512(queue, (N,), None, keys_cl.data, hashes.data)
[(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)]
hex(hashes.get()[0][0])
'0xb5ef328bcdfa7965L'
from hashlib import sha512
sha512('aaa').hexdigest()
'd6f644b19812e97b5d871658d6d3400ecd4787faeb9b8990c1e7608288664be77257104a58d033bcf1a0e0945ff06468ebe53e2dff36e248424c7273117dac09'
hex(hashes.get()[0][0])
'0xf6144929486b971cL'