In [1]:
%qtconsole --colors=linux
In [4]:
from __future__ import division
import numpy as np
import pyopencl as cl
import pyopencl.array
import pyopencl.tools
In [5]:
%load_ext pyopencl.ipython_ext
The pyopencl.ipython_ext extension is already loaded. To reload it, use:
  %reload_ext pyopencl.ipython_ext
In [6]:
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
In [7]:
device=ctx.devices[0]
print device.max_mem_alloc_size
print device.max_work_group_size
1073741824
256
In [5]:
%%cl_kernel -o "-cl-fast-relaxed-math"

__kernel void get_id(__global int *c, const int x)
{
  int gid = get_global_id(0);
  c[gid] = gid+x;
}
In [6]:
c = cl.array.empty(queue, 100, dtype=np.int32)
In [6]:
 
In [7]:
get_id(queue, (100,), None, c.data, np.int32(33))
Out[7]:
<pyopencl._cl.Event at 0x7f4cc04f7b40>
In [8]:
c.get()
Out[8]:
array([ 33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,
        46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,
        59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,
        72,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,
        85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,
        98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110,
       111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123,
       124, 125, 126, 127, 128, 129, 130, 131, 132], dtype=int32)
In [9]:
%%cl_kernel 
__kernel void ComputeHashes(__global uint4 * hashes,
                            __constant char * charset,
                            __constant char * base,
							const uint charsetLength,
                                                        const uint prefixLength,
							const uint plainTextLength)
{
	uint X[16];
	uint id = get_global_id(0);
	int counter = id;
	
	int oc, a = 0, carry = 0;    
	X[0] = 0; X[1] = 0; X[2] = 0; X[3] = 0; 
    X[4] = 0; X[5] = 0; X[6] = 0; X[7] = 0; 
    X[8] = 0; X[9] = 0; X[10] = 0; X[11] = 0; 
    X[12] = 0; X[13] = 0; X[14] = 0; X[15] = 0; 
        for (int i = 0; i < prefixLength; ++i)
        {               
                X[i >> 2] |= base[i] << ((i & 3) << 3);               
        }
	
	for (int i = prefixLength; i < plainTextLength; ++i)
	{
		oc = counter / charsetLength;
        a = base[i] + carry + counter - oc * charsetLength;
		if (a >= charsetLength) { a -= charsetLength; carry = 1; }
		else carry = 0;
        X[i >> 2] |= charset[a] << ((i & 3) << 3);
		counter = oc;
	}
	X[plainTextLength >> 2] |= ((uint)(0x00000080) << ((plainTextLength & 3) << 3));
	
	uint A, B, C, D;
	
#define S(x,n) ((x << n) | ((x & 0xFFFFFFFF) >> (32 - n)))

#define P(a,b,c,d,k,s,t)										\
{																\
	a += F(b,c,d) + X[k] + t; a = S(a,s) + b;					\
}																\

#define P0(a,b,c,d,k,s,t)										\
{																\
	a += F(b,c,d) + t; a = S(a,s) + b;							\
}																\

#define P14(a,b,c,d,k,s,t)										\
{																\
	a += F(b,c,d) + (plainTextLength << 3) + t; a = S(a,s) + b;	\
}																\

    A = 0x67452301;
    B = 0xefcdab89;
    C = 0x98badcfe;
    D = 0x10325476;
	
#define F(x,y,z) (z ^ (x & (y ^ z)))

    P( A, B, C, D,  0,  7, 0xD76AA478 );
    P( D, A, B, C,  1, 12, 0xE8C7B756 );
    P( C, D, A, B,  2, 17, 0x242070DB );
    P( B, C, D, A,  3, 22, 0xC1BDCEEE );
    P( A, B, C, D,  4,  7, 0xF57C0FAF );
    P( D, A, B, C,  5, 12, 0x4787C62A );
    P( C, D, A, B,  6, 17, 0xA8304613 );
    P( B, C, D, A,  7, 22, 0xFD469501 );
    P( A, B, C, D,  8,  7, 0x698098D8 );
    P( D, A, B, C,  9, 12, 0x8B44F7AF );
    P( C, D, A, B, 10, 17, 0xFFFF5BB1 );
    P( B, C, D, A, 11, 22, 0x895CD7BE );
    P( A, B, C, D, 12,  7, 0x6B901122 );
    P( D, A, B, C, 13, 12, 0xFD987193 );
    P14( C, D, A, B, 14, 17, 0xA679438E );
    P( B, C, D, A, 15, 22, 0x49B40821 );

#undef F

#define F(x,y,z) (y ^ (z & (x ^ y)))

    P( A, B, C, D,  1,  5, 0xF61E2562 );
    P( D, A, B, C,  6,  9, 0xC040B340 );
	P( C, D, A, B, 11, 14, 0x265E5A51 );
    P( B, C, D, A,  0, 20, 0xE9B6C7AA );
    P( A, B, C, D,  5,  5, 0xD62F105D );
    P( D, A, B, C, 10,  9, 0x02441453 );
    P( C, D, A, B, 15, 14, 0xD8A1E681 );
    P( B, C, D, A,  4, 20, 0xE7D3FBC8 );
    P( A, B, C, D,  9,  5, 0x21E1CDE6 );
    P14( D, A, B, C, 14,  9, 0xC33707D6 );
    P( C, D, A, B,  3, 14, 0xF4D50D87 );
    P( B, C, D, A,  8, 20, 0x455A14ED );
    P( A, B, C, D, 13,  5, 0xA9E3E905 );
    P( D, A, B, C,  2,  9, 0xFCEFA3F8 );
    P( C, D, A, B,  7, 14, 0x676F02D9 );
    P( B, C, D, A, 12, 20, 0x8D2A4C8A );

#undef F
    
#define F(x,y,z) (x ^ y ^ z)

    P( A, B, C, D,  5,  4, 0xFFFA3942 );
    P( D, A, B, C,  8, 11, 0x8771F681 );
    P( C, D, A, B, 11, 16, 0x6D9D6122 );
    P14( B, C, D, A, 14, 23, 0xFDE5380C );
    P( A, B, C, D,  1,  4, 0xA4BEEA44 );
    P( D, A, B, C,  4, 11, 0x4BDECFA9 );
    P( C, D, A, B,  7, 16, 0xF6BB4B60 );
    P( B, C, D, A, 10, 23, 0xBEBFBC70 );
    P( A, B, C, D, 13,  4, 0x289B7EC6 );
    P( D, A, B, C,  0, 11, 0xEAA127FA );
    P( C, D, A, B,  3, 16, 0xD4EF3085 );
    P( B, C, D, A,  6, 23, 0x04881D05 );
    P( A, B, C, D,  9,  4, 0xD9D4D039 );
    P( D, A, B, C, 12, 11, 0xE6DB99E5 );
    P( C, D, A, B, 15, 16, 0x1FA27CF8 );
    P( B, C, D, A,  2, 23, 0xC4AC5665 );

#undef F

#define F(x,y,z) (y ^ (x | ~z))

    P( A, B, C, D,  0,  6, 0xF4292244 );
    P( D, A, B, C,  7, 10, 0x432AFF97 );
    P14( C, D, A, B, 14, 15, 0xAB9423A7 );
    P( B, C, D, A,  5, 21, 0xFC93A039 );
    P( A, B, C, D, 12,  6, 0x655B59C3 );
    P( D, A, B, C,  3, 10, 0x8F0CCC92 );
    P( C, D, A, B, 10, 15, 0xFFEFF47D );
    P( B, C, D, A,  1, 21, 0x85845DD1 );
    P( A, B, C, D,  8,  6, 0x6FA87E4F );
    P( D, A, B, C, 15, 10, 0xFE2CE6E0 );
    P( C, D, A, B,  6, 15, 0xA3014314 );
    P( B, C, D, A, 13, 21, 0x4E0811A1 );
    P( A, B, C, D,  4,  6, 0xF7537E82 );
    P( D, A, B, C, 11, 10, 0xBD3AF235 );
    P( C, D, A, B,  2, 15, 0x2AD7D2BB );
    P( B, C, D, A,  9, 21, 0xEB86D391 );

#undef F
	
	hashes[id].x = A + 0x67452301;
	hashes[id].y = B + 0xefcdab89;
	hashes[id].z = C + 0x98badcfe;
	hashes[id].w = D + 0x10325476;
}
In [58]:
from time import time
t0=time()
N=100000
hashes = cl.array.empty(queue, N, dtype=cl.array.vec.uint4)
charset_str = "abcefghijklmnopqrstuvwxyz0123456789"
charset_host = np.array(map(ord, charset_str), dtype=np.uint8)
charset = cl.array.to_device(queue, charset_host)
prefixLength = np.uint32(17)
base = cl.array.to_device(queue, np.array([ord('x')]*prefixLength+[0, 0, 0, 0], dtype=np.uint8))
charsetLength = np.uint32(charset.shape[0])
plainTextLength = np.uint32(base.shape[0])
ComputeHashes(queue, (N,), None, hashes.data, charset.data, base.data, charsetLength, prefixLength, plainTextLength).wait()
rtn = hashes.get()
print time()-t0
0.00487613677979
In [71]:
from itertools import product, islice
from hashlib import md5
def hd(n):
    return "".join("%02x"%((x>>i)&0xff) for x in n for i in range(0,32,8))
a= map(hd, rtn) 
In [63]:
t0 = time()
h = (('x'*prefixLength+"".join(reversed(s))) for s in islice(product(*[charset_str ]*4), len(a)))
b = map(lambda x:md5(x).digest(), h)
print time()-t0
1.01328802109
In [6]:
!set PYOPENCL_COMPILER_OUTPUT=1
In [52]:
%%cl_kernel
/*
* This software is Copyright (c) 2012 Myrice <qqlddg at gmail dot com>
* and it is hereby released to the general public under the following terms:
* Redistribution and use in source and binary forms, with or without modification, are permitted.
*/

#ifdef cl_khr_byte_addressable_store
#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : disable
#endif

#define uint8_t  unsigned char
#define uint32_t unsigned int
#define uint64_t unsigned long
#define SALT_SIZE 0

#define BINARY_SIZE 8
#define FULL_BINARY_SIZE 64


#define PLAINTEXT_LENGTH 20

#define CIPHERTEXT_LENGTH 128

#define KEYS_PER_CRYPT (1024*512)
#define ITERATIONS 1

#define MIN_KEYS_PER_CRYPT	(KEYS_PER_CRYPT)
#define MAX_KEYS_PER_CRYPT	(ITERATIONS*KEYS_PER_CRYPT)


/// Warning: This version of SWAP64(n) is slow and avoid bugs on AMD GPUs(7970)
#define SWAP64(n)       as_ulong(as_uchar8(n).s76543210) 

/*#define SWAP64(n) \
  (((n) << 56)					\
   | (((n) & 0xff00) << 40)			\
   | (((n) & 0xff0000) << 24)			\
   | (((n) & 0xff000000) << 8)			\
   | (((n) >> 8) & 0xff000000)			\
   | (((n) >> 24) & 0xff0000)			\
   | (((n) >> 40) & 0xff00)			\
   | ((n) >> 56))
   
*/


#define rol(x,n) ((x << n) | (x >> (64-n)))
#define ror(x,n) ((x >> n) | (x << (64-n)))
#define Ch(x,y,z) ((x & y) ^ ( (~x) & z))
#define Maj(x,y,z) ((x & y) ^ (x & z) ^ (y & z))
#define Sigma0(x) ((ror(x,28))  ^ (ror(x,34)) ^ (ror(x,39)))
#define Sigma1(x) ((ror(x,14))  ^ (ror(x,18)) ^ (ror(x,41)))
#define sigma0(x) ((ror(x,1))  ^ (ror(x,8)) ^(x>>7))
#define sigma1(x) ((ror(x,19)) ^ (ror(x,61)) ^(x>>6))



typedef struct { // notice memory align problem
	uint64_t H[8];
	uint32_t buffer[32];	//1024 bits
	uint32_t buflen;
} sha512_ctx;

typedef struct {
    uint8_t length;
    char v[PLAINTEXT_LENGTH+1];
} sha512_key;


/* Macros for reading/writing chars from int32's */
#define PUTCHAR(buf, index, val) (buf)[(index)>>2] = ((buf)[(index)>>2] & ~(0xffU << (((index) & 3) << 3))) + ((val) << (((index) & 3) << 3))


__constant uint64_t k[] = {
	0x428a2f98d728ae22UL, 0x7137449123ef65cdUL, 0xb5c0fbcfec4d3b2fUL,
	    0xe9b5dba58189dbbcUL,
	0x3956c25bf348b538UL, 0x59f111f1b605d019UL, 0x923f82a4af194f9bUL,
	    0xab1c5ed5da6d8118UL,
	0xd807aa98a3030242UL, 0x12835b0145706fbeUL, 0x243185be4ee4b28cUL,
	    0x550c7dc3d5ffb4e2UL,
	0x72be5d74f27b896fUL, 0x80deb1fe3b1696b1UL, 0x9bdc06a725c71235UL,
	    0xc19bf174cf692694UL,
	0xe49b69c19ef14ad2UL, 0xefbe4786384f25e3UL, 0x0fc19dc68b8cd5b5UL,
	    0x240ca1cc77ac9c65UL,
	0x2de92c6f592b0275UL, 0x4a7484aa6ea6e483UL, 0x5cb0a9dcbd41fbd4UL,
	    0x76f988da831153b5UL,
	0x983e5152ee66dfabUL, 0xa831c66d2db43210UL, 0xb00327c898fb213fUL,
	    0xbf597fc7beef0ee4UL,
	0xc6e00bf33da88fc2UL, 0xd5a79147930aa725UL, 0x06ca6351e003826fUL,
	    0x142929670a0e6e70UL,
	0x27b70a8546d22ffcUL, 0x2e1b21385c26c926UL, 0x4d2c6dfc5ac42aedUL,
	    0x53380d139d95b3dfUL,
	0x650a73548baf63deUL, 0x766a0abb3c77b2a8UL, 0x81c2c92e47edaee6UL,
	    0x92722c851482353bUL,
	0xa2bfe8a14cf10364UL, 0xa81a664bbc423001UL, 0xc24b8b70d0f89791UL,
	    0xc76c51a30654be30UL,
	0xd192e819d6ef5218UL, 0xd69906245565a910UL, 0xf40e35855771202aUL,
	    0x106aa07032bbd1b8UL,
	0x19a4c116b8d2d0c8UL, 0x1e376c085141ab53UL, 0x2748774cdf8eeb99UL,
	    0x34b0bcb5e19b48a8UL,
	0x391c0cb3c5c95a63UL, 0x4ed8aa4ae3418acbUL, 0x5b9cca4f7763e373UL,
	    0x682e6ff3d6b2b8a3UL,
	0x748f82ee5defb2fcUL, 0x78a5636f43172f60UL, 0x84c87814a1f0ab72UL,
	    0x8cc702081a6439ecUL,
	0x90befffa23631e28UL, 0xa4506cebde82bde9UL, 0xbef9a3f7b2c67915UL,
	    0xc67178f2e372532bUL,
	0xca273eceea26619cUL, 0xd186b8c721c0c207UL, 0xeada7dd6cde0eb1eUL,
	    0xf57d4f7fee6ed178UL,
	0x06f067aa72176fbaUL, 0x0a637dc5a2c898a6UL, 0x113f9804bef90daeUL,
	    0x1b710b35131c471bUL,
	0x28db77f523047d84UL, 0x32caab7b40c72493UL, 0x3c9ebe0a15c9bebcUL,
	    0x431d67c49c100d4cUL,
	0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL, 0x5fcb6fab3ad6faecUL,
	    0x6c44198c4a475817UL,
};

inline void sha512(__global const char* password, uint8_t pass_len, 
	__global uint64_t* hash, uint32_t offset)
{
    __private sha512_ctx ctx;
	
	uint32_t* b32 = ctx.buffer;
	//set password to buffer
    for (uint32_t i = 0; i < pass_len; i++) {
		PUTCHAR(b32,i,password[i]);
	}
    ctx.buflen = pass_len;

	//append 1 to ctx buffer
	uint32_t length = ctx.buflen;
	PUTCHAR(b32, length, 0x80);
	while((++length & 3) != 0)  {
		PUTCHAR(b32, length, 0);
	}

	uint32_t* buffer32 = b32+(length>>2);
	for(uint32_t i = length; i < 128; i+=4) {// append 0 to 128
		*buffer32++=0;
	}

	//append length to buffer
	uint64_t *buffer64 = (uint64_t *)ctx.buffer;
	buffer64[15] = SWAP64((uint64_t) ctx.buflen * 8); 

	// sha512 main
	int i;
	
	uint64_t a = 0x6a09e667f3bcc908UL;
	uint64_t b = 0xbb67ae8584caa73bUL;
	uint64_t c = 0x3c6ef372fe94f82bUL;
	uint64_t d = 0xa54ff53a5f1d36f1UL;
	uint64_t e = 0x510e527fade682d1UL;
	uint64_t f = 0x9b05688c2b3e6c1fUL;
	uint64_t g = 0x1f83d9abfb41bd6bUL;
	uint64_t h = 0x5be0cd19137e2179UL;

	__private uint64_t w[16];

	uint64_t *data = (uint64_t *) ctx.buffer;

	#pragma unroll 16
	for (i = 0; i < 16; i++)
		w[i] = SWAP64(data[i]);

	uint64_t t1, t2;
	#pragma unroll 16
	for (i = 0; i < 16; i++) {
		t1 = k[i] + w[i] + h + Sigma1(e) + Ch(e, f, g);
		t2 = Maj(a, b, c) + Sigma0(a);

		h = g;
		g = f;
		f = e;
		e = d + t1;
		d = c;
		c = b;
		b = a;
		a = t1 + t2;
	}
	
	for (i = 16; i < 80; i++) {

		w[i & 15] =sigma1(w[(i - 2) & 15]) + sigma0(w[(i - 15) & 15]) + w[(i -16) & 15] + w[(i - 7) & 15];
		t1 = k[i] + w[i & 15] + h + Sigma1(e) + Ch(e, f, g);
		t2 = Maj(a, b, c) + Sigma0(a);

		h = g;
		g = f;
		f = e;
		e = d + t1;
		d = c;
		c = b;
		b = a;
		a = t1 + t2;
	}
	hash[offset] = SWAP64(a);
}

__kernel void kernel_sha512(
	__global const sha512_key *password, 
	__global uint64_t *hash)
{

    uint32_t idx = get_global_id(0);
	for(uint32_t it = 0; it < ITERATIONS; ++it) {		
		uint32_t offset = idx+it*KEYS_PER_CRYPT;
    	sha512(password[offset].v, password[offset].length, 
			hash, offset);
	}
}

__kernel void kernel_cmp(
	__constant uint64_t* binary, 
	__global uint64_t *hash,
	__global uint32_t* result)
{
    uint32_t idx = get_global_id(0);
	if(idx == 0)
		*result = 0;
	
	for(uint32_t it = 0; it < ITERATIONS; ++it) {		
		uint32_t offset = idx+it*KEYS_PER_CRYPT;
			if (*binary == hash[offset])
				*result = 1;
	}
}
/usr/local/lib/python2.7/dist-packages/pyopencl-2014.1-py2.7-linux-x86_64.egg/pyopencl/__init__.py:59: CompilerWarning: From-source build succeeded, but resulted in non-empty logs:
Build on <pyopencl.Device 'Tahiti' on 'AMD Accelerated Parallel Processing' at 0x29706d0> succeeded, but said:

LOOP UNROLL: pragma unroll (line 162)
    Unrolled as requested!
LOOP UNROLL: pragma unroll (line 167)
    Unrolled as requested!

  warn(text, CompilerWarning)
In [35]:
"""
typedef struct {
    uint8_t length;
    char v[PLAINTEXT_LENGTH+1];
} sha512_key;
"""
sha512_dtype = np.dtype([("length", np.uint8), ("v", 'u1', 21)])
pyopencl.tools.register_dtype(sha512_dtype, 'sha512_key')
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-35-8fb68940bc6e> in <module>()
      6 """
      7 sha512_dtype = np.dtype([("length", np.uint8), ("v", 'u1', 21)])
----> 8 pyopencl.tools.register_dtype(sha512_dtype, 'sha512_key')

/usr/local/lib/python2.7/dist-packages/pyopencl-2014.1-py2.7-linux-x86_64.egg/pyopencl/compyte/dtypes.pyc in register_dtype(dtype, c_names, alias_ok)
    144     if not alias_ok and dtype in DTYPE_TO_NAME:
    145         raise RuntimeError("dtype '%s' already registered (as '%s', new names '%s')"
--> 146                 % (dtype, DTYPE_TO_NAME[dtype], ", ".join(c_names)))
    147 
    148     get_or_register_dtype(c_names, dtype)

RuntimeError: dtype '[('length', 'u1'), ('v', 'u1', (21,))]' already registered (as 'sha512_key', new names 'sha512_key')
In [53]:
from time import time
t0=time()
keys = ["", "bbb", "ccc"]
N=len(keys)
keys_host = np.array([(len(k), map(ord,k)+[0]*(21-len(k)) ) for k in keys], dtype=sha512_dtype)
keys_cl = cl.array.to_device(queue, keys_host)
hashes = cl.array.zeros(queue, N*8, dtype=cl.array.vec.ulong8)
print hashes.get()
rtn = kernel_sha512(queue, (N,), None, keys_cl.data,  hashes.data)
[(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)
 (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)
 (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)
 (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)
 (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)
 (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)
 (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)
 (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)
 (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)
 (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)
 (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)
 (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L) (0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L)]
In [54]:
hex(hashes.get()[0][0])
Out[54]:
'0xb5ef328bcdfa7965L'
In [20]:
from hashlib import sha512
sha512('aaa').hexdigest()
Out[20]:
'd6f644b19812e97b5d871658d6d3400ecd4787faeb9b8990c1e7608288664be77257104a58d033bcf1a0e0945ff06468ebe53e2dff36e248424c7273117dac09'
In [27]:
hex(hashes.get()[0][0])
Out[27]:
'0xf6144929486b971cL'