In [2]:
from IPython.html.widgets import *
from IPython.display import display
from collections import Counter
from itertools import combinations, chain
from hashlib import sha384
from time import time
from sys import stdout
In [3]:
text = TextWidget()
display(text)
s0 = """/*X
It doesn't matter you use vim, emacs, or echo.X
Real programmers use one-way hash functions to compile their source code.X
This file can be compiled to executable binary by gcc or SHA384.X
To get this file, simplyX 
$ wget https://raw.githubusercontent.com/tjwei/tjw_ipynb/master/a.cX
To compile this file, eitherX
$ sha384sum a.cX|Xxxd -r -pX>Xa.outX&&Xchmod a+x a.outX
orX
$ gcc a.cX
Then tryX
$ ./a.outX
 to execute the binary.X
*/X
int main(int argc,Xchar **argv)X
{X
Xreturn 0;X
}X
X"""
L = Counter(s0)['X']
print "L=", L
all_comb = chain( *[combinations(range(L), r) for r in range(L+1)] )
iters = 0
for iters, idx in enumerate(all_comb):
        x=[0]*L
        for i in idx:
            x[i]=1
        s=s0.replace('X', '%s')%tuple(' '*x[i] for i in range(L))
        d = sha384(s).digest()
        if iters%10000 ==0:
            text.value="iters={} progress={:5.2f}%".format(iters,iters*100.0/2**L)
        if d.startswith('\n#') and d.rindex('\n')==0:                    
            print x
            print s.replace(' ', '.')
            with open('a.c', 'w') as f:
                f.write(s)
            break
print "done", sum(x)
L= 27
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1]
/*
It.doesn't.matter.you.use.vim,.emacs,.or.echo.
Real.programmers.use.one-way.hash.functions.to.compile.their.source.code.
This.file.can.be.compiled.to.executable.binary.by.gcc.or.SHA384.
To.get.this.file,.simply.
$.wget.https://raw.githubusercontent.com/tjwei/tjw_ipynb/master/a.c.
To.compile.this.file,.either
$.sha384sum.a.c|xxd.-r.-p>a.out.&&chmod.a+x.a.out
or
$.gcc.a.c
Then.try
$../a.out
.to.execute.the.binary..
*/
int.main(int.argc,char.**argv)
{
return.0;
}
.
done 4
In [2]:
from IPython.parallel import Client
rc = Client()
view = rc[:]
from sys import stdout
from collections import Counter
from itertools import combinations, chain
from more_itertools import chunked
from IPython.display import display
from IPython.html.widgets import *
from time import time
In [3]:
%%px
from hashlib import sha384, md5, sha1
def check_hash(start):
    for x in range(start, start+step):
        s=s0%tuple(' '*((x>>i)&1) for i in range(L))
        d = sha384(s).digest()
        if d.startswith(src) and d.rindex('\n')==first_n:
            with open('0.c', 'w') as f:
                f.write(s)
            return s
In [4]:
s0="""#include <stdio.h>X/*X
echo "Hello world!"X<< } &&XexitX
X
It doesn't matter you use vim, emacs, or echo.X
Real programmers use one-way functions to compile programs.X
X
$ sh 0.cX
$ gcc 0.cX&&X./a.outX
$ sha384sum 0.cX|Xxxd -r -pX>Xa.outX&&Xchmod a+x a.outX
$ ./a.outX
X
sha384 was chosen because we can fit an ELF in the digest!(48>45)X
X*/X
main()X{X
Xputs("Hello world!");X
}
X"""
step = 1000
src = '. *\n'
L = Counter(s0)['X']
s0 = s0.replace('X', '%s')
first_n = src.index('\n') 
view.push(dict(src=src, step=step, s0=s0, L=L, first_n=first_n))
t0 = time()
expected_number = 256**len(src)
print "L=", L
stdout.flush()
text = TextWidget()
display(text)
for chunk in chunked(xrange(0, 2**L, step), 500):
    result = view.map_sync(lambda i:check_hash(i), chunk)
    if any(result):
        break
    i = chunk[0]
    elapsed = (time()-t0)/60
    percent = i*100.0/expected_number
    exp =  0 if i==0 else 100.0*elapsed/percent/60.0/24.0
    text.value = "{} {:5.2f}% {:.1f}min EXP={:.2f}days".format(i, percent, elapsed, exp)
print "done"
L= 30
done
In [2]:
%%px
from hashlib import sha384, md5, sha1
table = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_."
def check_hash(start):
    for x in range(start, start+step):
        s=s0%tuple( table[(x>>(6*i))&63] for i in range(L))
        d = sha384(s).digest()
        if d.startswith(src) and d.rindex('\n')==first_n:
            with open('0.c', 'w') as f:
                f.write(s)
            return s
In [ ]:
s0 = """#define echo main(){puts(
#define exit );}
echo "Hello world!"
exit
"""
step = 1000
src = 'sh *\n#'
L = Counter(s0)['X']
s0 = s0.replace('X', '%s')
first_n = src.index('\n') 
view.push(dict(src=src, step=step, s0=s0, L=L, first_n=first_n))
t0 = time()
expected_number = 256**len(src)
print "L=", L
stdout.flush()
text = TextWidget()
display(text)
for chunk in chunked(xrange(0, 2**(6*L), step), 500):
    result = view.map_sync(lambda i:check_hash(i), chunk)
    if any(result):
        break
    i = chunk[0]
    elapsed = (time()-t0)/60
    percent = i*100.0/expected_number
    exp =  0 if i==0 else 100.0*elapsed/percent/60.0/24.0
    text.value = "{} {:5.2f}% {:.1f}min EXP={:.2f}days".format(i, percent, elapsed, exp)
print "done"
L= 7
In [19]:
# OPENCL VERSION STARTS HERE
from collections import Counter
s0="""#include <stdio.h>X/*X
echo "Hello world!"X<< '}'X&&XexitX
X
It doesn't matter you use vim, emacs, or echo.X
Real programmers use one-way functions to compile programs.X
X
Usage:X
$ mkdir testX
$ cd testX
$ wget https://raw.githubusercontent.com/tjwei/tjw_ipynb/master/0.cX
$ sh 0.cX
Hello World!X
$ gcc 0.cX&&X./a.outX
Hello World!X
$ shasum -a 384 0.cX|Xxxd -r -pX>Xa.outX&&Xchmod a+x a.outX
$ ./a.outX
Hello World!X
X
sha384 was chosenX
because we can fit an ELF in the digest!(48>45)X
X*/X
main()X{X
Xputs("Hello world!");X
}
X"""
len(s0), Counter(s0)['X']
Out[19]:
(531, 39)
In [20]:
from more_itertools import chunked
sp = s0.split('X')[:-1]
prefix = '. *\n'
s0_ord = map(ord, s0.replace('X', ' '))
with open("opencl_realman.h" ,"w") as f:    
    print >>f, "__constant uint8_t SRC[%d]={"%len(s0)+ ",\n".join(str(x)[1:-1] for x in chunked(s0_ord, 20))+"};"
    pos = 0
    print >>f, "#define UPDATE \\"
    xx = 1
    for l in sp:
        L=len(l)+1
        print >>f, "ctx_update(ctx, SRC+%d, (x&0x%xUL) ? %d : %d);\\"%(pos, xx, L, L-1)
        pos=pos+L
        xx<<=1
    print >>f
    prefix_hex=prefix_mask=0
    for i, c in enumerate(prefix):
        offset = 56-8*i
        prefix_hex |= ord(c)<<offset
        prefix_mask |= 0xff<<offset
    print >>f, "#define CONDITION (ctx->H[0]&0x%xUL) == 0x%xUL"%(prefix_mask, prefix_hex)
with open("opencl_realman.h" ,"r") as f:
    print f.read()
__constant uint8_t SRC[531]={35, 105, 110, 99, 108, 117, 100, 101, 32, 60, 115, 116, 100, 105, 111, 46, 104, 62, 32, 47,
42, 32, 10, 101, 99, 104, 111, 32, 34, 72, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100,
33, 34, 32, 60, 60, 32, 39, 125, 39, 32, 38, 38, 32, 101, 120, 105, 116, 32, 10, 32,
10, 73, 116, 32, 100, 111, 101, 115, 110, 39, 116, 32, 109, 97, 116, 116, 101, 114, 32, 121,
111, 117, 32, 117, 115, 101, 32, 118, 105, 109, 44, 32, 101, 109, 97, 99, 115, 44, 32, 111,
114, 32, 101, 99, 104, 111, 46, 32, 10, 82, 101, 97, 108, 32, 112, 114, 111, 103, 114, 97,
109, 109, 101, 114, 115, 32, 117, 115, 101, 32, 111, 110, 101, 45, 119, 97, 121, 32, 102, 117,
110, 99, 116, 105, 111, 110, 115, 32, 116, 111, 32, 99, 111, 109, 112, 105, 108, 101, 32, 112,
114, 111, 103, 114, 97, 109, 115, 46, 32, 10, 32, 10, 85, 115, 97, 103, 101, 58, 32, 10,
36, 32, 109, 107, 100, 105, 114, 32, 116, 101, 115, 116, 32, 10, 36, 32, 99, 100, 32, 116,
101, 115, 116, 32, 10, 36, 32, 119, 103, 101, 116, 32, 104, 116, 116, 112, 115, 58, 47, 47,
114, 97, 119, 46, 103, 105, 116, 104, 117, 98, 117, 115, 101, 114, 99, 111, 110, 116, 101, 110,
116, 46, 99, 111, 109, 47, 116, 106, 119, 101, 105, 47, 116, 106, 119, 95, 105, 112, 121, 110,
98, 47, 109, 97, 115, 116, 101, 114, 47, 48, 46, 99, 32, 10, 36, 32, 115, 104, 32, 48,
46, 99, 32, 10, 72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, 32, 10, 36, 32,
103, 99, 99, 32, 48, 46, 99, 32, 38, 38, 32, 46, 47, 97, 46, 111, 117, 116, 32, 10,
72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, 32, 10, 36, 32, 115, 104, 97, 115,
117, 109, 32, 45, 97, 32, 51, 56, 52, 32, 48, 46, 99, 32, 124, 32, 120, 120, 100, 32,
45, 114, 32, 45, 112, 32, 62, 32, 97, 46, 111, 117, 116, 32, 38, 38, 32, 99, 104, 109,
111, 100, 32, 97, 43, 120, 32, 97, 46, 111, 117, 116, 32, 10, 36, 32, 46, 47, 97, 46,
111, 117, 116, 32, 10, 72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, 32, 10, 32,
10, 115, 104, 97, 51, 56, 52, 32, 119, 97, 115, 32, 99, 104, 111, 115, 101, 110, 32, 10,
98, 101, 99, 97, 117, 115, 101, 32, 119, 101, 32, 99, 97, 110, 32, 102, 105, 116, 32, 97,
110, 32, 69, 76, 70, 32, 105, 110, 32, 116, 104, 101, 32, 100, 105, 103, 101, 115, 116, 33,
40, 52, 56, 62, 52, 53, 41, 32, 10, 32, 42, 47, 32, 10, 109, 97, 105, 110, 40, 41,
32, 123, 32, 10, 32, 112, 117, 116, 115, 40, 34, 72, 101, 108, 108, 111, 32, 119, 111, 114,
108, 100, 33, 34, 41, 59, 32, 10, 125, 10, 32};
#define UPDATE \
ctx_update(ctx, SRC+0, (x&0x1UL) ? 19 : 18);\
ctx_update(ctx, SRC+19, (x&0x2UL) ? 3 : 2);\
ctx_update(ctx, SRC+22, (x&0x4UL) ? 21 : 20);\
ctx_update(ctx, SRC+43, (x&0x8UL) ? 7 : 6);\
ctx_update(ctx, SRC+50, (x&0x10UL) ? 3 : 2);\
ctx_update(ctx, SRC+53, (x&0x20UL) ? 5 : 4);\
ctx_update(ctx, SRC+58, (x&0x40UL) ? 2 : 1);\
ctx_update(ctx, SRC+60, (x&0x80UL) ? 48 : 47);\
ctx_update(ctx, SRC+108, (x&0x100UL) ? 61 : 60);\
ctx_update(ctx, SRC+169, (x&0x200UL) ? 2 : 1);\
ctx_update(ctx, SRC+171, (x&0x400UL) ? 8 : 7);\
ctx_update(ctx, SRC+179, (x&0x800UL) ? 14 : 13);\
ctx_update(ctx, SRC+193, (x&0x1000UL) ? 11 : 10);\
ctx_update(ctx, SRC+204, (x&0x2000UL) ? 69 : 68);\
ctx_update(ctx, SRC+273, (x&0x4000UL) ? 10 : 9);\
ctx_update(ctx, SRC+283, (x&0x8000UL) ? 14 : 13);\
ctx_update(ctx, SRC+297, (x&0x10000UL) ? 11 : 10);\
ctx_update(ctx, SRC+308, (x&0x20000UL) ? 3 : 2);\
ctx_update(ctx, SRC+311, (x&0x40000UL) ? 8 : 7);\
ctx_update(ctx, SRC+319, (x&0x80000UL) ? 14 : 13);\
ctx_update(ctx, SRC+333, (x&0x100000UL) ? 21 : 20);\
ctx_update(ctx, SRC+354, (x&0x200000UL) ? 2 : 1);\
ctx_update(ctx, SRC+356, (x&0x400000UL) ? 10 : 9);\
ctx_update(ctx, SRC+366, (x&0x800000UL) ? 2 : 1);\
ctx_update(ctx, SRC+368, (x&0x1000000UL) ? 6 : 5);\
ctx_update(ctx, SRC+374, (x&0x2000000UL) ? 3 : 2);\
ctx_update(ctx, SRC+377, (x&0x4000000UL) ? 16 : 15);\
ctx_update(ctx, SRC+393, (x&0x8000000UL) ? 11 : 10);\
ctx_update(ctx, SRC+404, (x&0x10000000UL) ? 14 : 13);\
ctx_update(ctx, SRC+418, (x&0x20000000UL) ? 2 : 1);\
ctx_update(ctx, SRC+420, (x&0x40000000UL) ? 19 : 18);\
ctx_update(ctx, SRC+439, (x&0x80000000UL) ? 49 : 48);\
ctx_update(ctx, SRC+488, (x&0x100000000UL) ? 2 : 1);\
ctx_update(ctx, SRC+490, (x&0x200000000UL) ? 3 : 2);\
ctx_update(ctx, SRC+493, (x&0x400000000UL) ? 8 : 7);\
ctx_update(ctx, SRC+501, (x&0x800000000UL) ? 2 : 1);\
ctx_update(ctx, SRC+503, (x&0x1000000000UL) ? 2 : 1);\
ctx_update(ctx, SRC+505, (x&0x2000000000UL) ? 22 : 21);\
ctx_update(ctx, SRC+527, (x&0x4000000000UL) ? 4 : 3);\

#define CONDITION (ctx->H[0]&0xffffffff00000000UL) == 0x2e202a0a00000000UL

In [21]:
from __future__ import division
import numpy as np
import pyopencl as cl
import pyopencl.array
import pyopencl.tools
%load_ext pyopencl.ipython_ext
The pyopencl.ipython_ext extension is already loaded. To reload it, use:
  %reload_ext pyopencl.ipython_ext
In [22]:
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
device=ctx.devices[0]
print device.max_mem_alloc_size
print device.max_work_group_size
1073741824
256
In [23]:
%%cl_kernel -o "-I ."
#define _OPENCL_COMPILER
#define DEVICE_INFO 2|64|1024
#include "opencl_cryptsha512.h"
#include "opencl_realman.h"
__constant uint64_t k[] = {
    0x428a2f98d728ae22UL, 0x7137449123ef65cdUL, 0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL,
    0x3956c25bf348b538UL, 0x59f111f1b605d019UL, 0x923f82a4af194f9bUL, 0xab1c5ed5da6d8118UL,
    0xd807aa98a3030242UL, 0x12835b0145706fbeUL, 0x243185be4ee4b28cUL, 0x550c7dc3d5ffb4e2UL,
    0x72be5d74f27b896fUL, 0x80deb1fe3b1696b1UL, 0x9bdc06a725c71235UL, 0xc19bf174cf692694UL,
    0xe49b69c19ef14ad2UL, 0xefbe4786384f25e3UL, 0x0fc19dc68b8cd5b5UL, 0x240ca1cc77ac9c65UL,
    0x2de92c6f592b0275UL, 0x4a7484aa6ea6e483UL, 0x5cb0a9dcbd41fbd4UL, 0x76f988da831153b5UL,
    0x983e5152ee66dfabUL, 0xa831c66d2db43210UL, 0xb00327c898fb213fUL, 0xbf597fc7beef0ee4UL,
    0xc6e00bf33da88fc2UL, 0xd5a79147930aa725UL, 0x06ca6351e003826fUL, 0x142929670a0e6e70UL,
    0x27b70a8546d22ffcUL, 0x2e1b21385c26c926UL, 0x4d2c6dfc5ac42aedUL, 0x53380d139d95b3dfUL,
    0x650a73548baf63deUL, 0x766a0abb3c77b2a8UL, 0x81c2c92e47edaee6UL, 0x92722c851482353bUL,
    0xa2bfe8a14cf10364UL, 0xa81a664bbc423001UL, 0xc24b8b70d0f89791UL, 0xc76c51a30654be30UL,
    0xd192e819d6ef5218UL, 0xd69906245565a910UL, 0xf40e35855771202aUL, 0x106aa07032bbd1b8UL,
    0x19a4c116b8d2d0c8UL, 0x1e376c085141ab53UL, 0x2748774cdf8eeb99UL, 0x34b0bcb5e19b48a8UL,
    0x391c0cb3c5c95a63UL, 0x4ed8aa4ae3418acbUL, 0x5b9cca4f7763e373UL, 0x682e6ff3d6b2b8a3UL,
    0x748f82ee5defb2fcUL, 0x78a5636f43172f60UL, 0x84c87814a1f0ab72UL, 0x8cc702081a6439ecUL,
    0x90befffa23631e28UL, 0xa4506cebde82bde9UL, 0xbef9a3f7b2c67915UL, 0xc67178f2e372532bUL,
    0xca273eceea26619cUL, 0xd186b8c721c0c207UL, 0xeada7dd6cde0eb1eUL, 0xf57d4f7fee6ed178UL,
    0x06f067aa72176fbaUL, 0x0a637dc5a2c898a6UL, 0x113f9804bef90daeUL, 0x1b710b35131c471bUL,
    0x28db77f523047d84UL, 0x32caab7b40c72493UL, 0x3c9ebe0a15c9bebcUL, 0x431d67c49c100d4cUL,
    0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL, 0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL,
};


inline void init_ctx384(__local sha512_ctx * ctx) {
    ctx->H[0] = 0xcbbb9d5dc1059ed8UL;
    ctx->H[1] = 0x629a292a367cd507UL;
    ctx->H[2] = 0x9159015a3070dd17UL;
    ctx->H[3] = 0x152fecd8f70e5939UL;
    ctx->H[4] = 0x67332667ffc00b31UL;
    ctx->H[5] = 0x8eb44a8768581511UL;
    ctx->H[6] = 0xdb0c2e0d64f98fa7UL;
    ctx->H[7] = 0x47b5481dbefa4fa4UL;
    ctx->total = 0;
    ctx->buflen = 0;
}

inline void sha512_block(__local sha512_ctx * ctx) {
    uint64_t a = ctx->H[0];
    uint64_t b = ctx->H[1];
    uint64_t c = ctx->H[2];
    uint64_t d = ctx->H[3];
    uint64_t e = ctx->H[4];
    uint64_t f = ctx->H[5];
    uint64_t g = ctx->H[6];
    uint64_t h = ctx->H[7];
    uint64_t t1, t2;
    uint64_t w[16];

#ifdef VECTOR_USAGE
    ulong16  w_vector;
    w_vector = vload16(0, ctx->buffer->mem_64);
    w_vector = SWAP64_V(w_vector);
    vstore16(w_vector, 0, w);
#else
    #pragma unroll
    for (int i = 0; i < 16; i++)
        w[i] = SWAP64(ctx->buffer->mem_64[i]);
#endif

    #pragma unroll
    for (int i = 0; i < 80; i++) {

        if (i > 15) {
            w[i & 15] = sigma1(w[(i - 2) & 15]) + sigma0(w[(i - 15) & 15]) + w[(i - 16) & 15] + w[(i - 7) & 15];
        }
        t1 = k[i] + w[i & 15] + h + Sigma1(e) + Ch(e, f, g);
        t2 = Maj(a, b, c) + Sigma0(a);
        h = g;
        g = f;
        f = e;
        e = d + t1;
        d = c;
        c = b;
        b = a;
        a = t1 + t2;
    }
    /* Put checksum in context given as argument. */
    ctx->H[0] += a;
    ctx->H[1] += b;
    ctx->H[2] += c;
    ctx->H[3] += d;
    ctx->H[4] += e;
    ctx->H[5] += f;
    ctx->H[6] += g;
    ctx->H[7] += h;
}

inline void insert_to_buffer(__local sha512_ctx    * ctx,
                      __constant  uint8_t * string,
                      const uint32_t len) {
    __local uint8_t * dest;
    dest = ctx->buffer->mem_08 + ctx->buflen;

    for (int i = 0; i < len; i++)
        PUTCHAR(dest, i, GETCHAR(string, i));
    ctx->buflen += len;    
}

inline void ctx_update(__local sha512_ctx * ctx,
                __constant uint8_t  * string, uint32_t len) {

    ctx->total += len;
    uint32_t startpos = ctx->buflen;
    uint32_t offset = 0;

    while(1){
            insert_to_buffer(ctx, string+offset, (len -offset <= 128 - startpos ? len-offset  : 128 - startpos));
            if(ctx->buflen<128) break;
            mem_fence(CLK_LOCAL_MEM_FENCE);
            sha512_block(ctx);            
            offset = offset +128 - startpos;
            ctx->buflen = 0;
            startpos=0;
    }

}

inline void ctx_append_1(__local sha512_ctx * ctx) {

    uint32_t length = ctx->buflen;
    PUTCHAR(ctx->buffer->mem_08, length, 0x80);

    while (++length & 3)
        PUTCHAR(ctx->buffer->mem_08, length, 0);

    if (length & 7) {
        __local uint32_t * l = (__local uint32_t *) (ctx->buffer->mem_08 + length);
        *l = 0;
        length += 4;
    }
    __local uint64_t * l = (__local uint64_t *) (ctx->buffer->mem_08 + length);

    while (length < 128) {
        *l++ = 0;
        length += 8;
    }
}

inline void ctx_add_length(__local sha512_ctx * ctx) {

    ctx->buffer->mem_64[15] = SWAP64((uint64_t) (ctx->total * 8));
}

inline void finish_ctx(__local sha512_ctx * ctx) {
    ctx_append_1(ctx);
    ctx_add_length(ctx);
    ctx->buflen = 0;
}

inline void clear_ctx_buffer(__local sha512_ctx * ctx) {

#ifdef VECTOR_USAGE
    ulong16  w_vector = 0;
    vstore16(w_vector, 0, ctx->buffer->mem_64);
#else
    #pragma unroll
    for (int i = 0; i < 16; i++)
        ctx->buffer->mem_64[i] = 0;
#endif

    ctx->buflen = 0;
}

inline void sha384_digest(__local sha512_ctx * ctx,
                   __global uint8_t   *result) {

    if (ctx->buflen <= 111) { //data+0x80+datasize fits in one 1024bit block
        finish_ctx(ctx);

    } else {
        bool moved = true;

        if (ctx->buflen < 128) { //data and 0x80 fits in one block
            ctx_append_1(ctx);
            moved = false;
        }
        sha512_block(ctx);
        clear_ctx_buffer(ctx);

        if (moved) //append 1,the rest is already clean
            PUTCHAR(ctx->buffer->mem_08, 0, 0x80);
        ctx_add_length(ctx);
    }
    sha512_block(ctx);
    
   if( CONDITION )
           *result=1;
else *result=0;
/*
    #pragma unroll
    for (int i = 0; i < 6; i++)
        result[i] = SWAP64(ctx->H[i]);
*/
}

inline void sha384crypt(__local  sha512_ctx * ctx,
                 __global uint8_t *output, uint64_t x) {    
    init_ctx384(ctx);
    UPDATE; 
    sha384_digest(ctx, output);
}

__kernel
void kernel_sha384(uint64_t base, __global   uint8_t * out_buffer,
                  __local sha512_ctx  * ctx_memory) {
    size_t gid = get_global_id(0);
    size_t lid = get_local_id(0);
    uint64_t x = base + gid;
    sha384crypt(&ctx_memory[lid], &out_buffer[gid], x);
}
In [26]:
from time import sleep
step = 10**7
t0 = time()
expected_number = 256**4
L=len(sp)
print "L=", L
stdout.flush()
text = TextWidget()
display(text)

for i in xrange(0,2**L, step):
    output=cl.array.zeros(queue, step, dtype=np.uint8)
    working = cl.LocalMemory(200*128)
    kernel_sha384(queue, (step,), (128,), np.uint64(i), output.data, working).wait()
    result = output.get()
    for j in np.where(result==1)[0]:        
        x =i+j
        s=s0.replace('X', '%s')%tuple(' '*((x>>i)&1) for i in range(L))
        print x,repr(sha384(s).digest())
        with open("good/%d.c"%x, "w") as f:
            f.write(s)
        stdout.flush()    
    elapsed = (time()-t0)/60
    percent = i*100.0/expected_number
    exp =  0 if i==0 else 100.0*elapsed/percent/60.0
    text.value = "{} {:5.2f}% {:.2f}min EXP={:.2f}hrs".format(i, percent, elapsed, exp)
    sleep(2)
print "done"
L= 39
1580306764 '. *\n"@\xc1\xe4\xa0X\xc4"\x13\xd08i\xb9\x88\xcd\xbb\x15\xbcl\xf2\xe7\xbdX\xedX\xc6\x1e\xf0>B\xdf\xf4\xb7\x00\xf9\xb0\x9f\xcbLr\xdd}\xfc\x81'
7385759359 ". *\n8\xff3`\x06'd\xc0\xee\xf4\x9b\xd3\xbaA\xa83\xb8\xb9\x96\xc9\x8c\x1e\xd4\xc1\x13\xfd\xfdO\x00\x1a9('\x90\x04\x04(<b*^*xe"
10346520310 '. *\ni\xab\x17\x1dQn\xe8t\xdbr\xb7,\xff\x9f\xbf\x9e\x9e\x8fN\xb8d\xe3\xec\x06^^\xf6V\xf6i\x98\xe8\xe0\xc8\x933\x98\xe7\xb9\xcf\xf4\t\x04\x82'
15535172170 '. *\n\x02\xd5[\xab\x0f-\xfa4\x14+\xfeL\xfdm\xaee>ExB\xae\xc4\x99\xd53\r\x80{.\xb3\xc3\x95+F\xb7\tJ\x0c\xdf\xaaL\ns\xb2'
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-26-73032072ccc5> in <module>()
     12     output=cl.array.zeros(queue, step, dtype=np.uint8)
     13     working = cl.LocalMemory(200*128)
---> 14     kernel_sha384(queue, (step,), (128,), np.uint64(i), output.data, working).wait()
     15     result = output.get()
     16     for j in np.where(result==1)[0]:

KeyboardInterrupt: 
In [ ]: