%pylab inline from IPython.display import Image display(Image('pdf_text.png')) # First, let's get the PDF file contents as a byte stream data = open('puzzle.pdf', 'rb').read() import zlib decompressed = zlib.decompress(data[78:(78+324)]) print(''.join(chr(x) for x in decompressed)) lines = ''.join(chr(x) for x in decompressed).split('\n') elements = [l.split(' ') for l in lines] x = [] y = [] for row in elements[:-3]: x.append(row[-2]) y.append(row[-1]) plot(x, y) axis('equal') import re matches = list(re.finditer(b'stream\n((.|\n)*?)\nendstream', data)) for m in matches: decompressed = zlib.decompress(m.group(1)) print('============ MATCH =============') print(''.join(chr(x) for x in decompressed[:2000])) m = matches[1] decompressed = zlib.decompress(m.group(1)) print(''.join(chr(x) for x in decompressed[:20000])) lines = ''.join(chr(x) for x in decompressed).split('\n') print(len(lines), len(lines[0])) imdata = np.array(bytearray(decompressed), dtype=np.uint8) print(len(imdata)) for m in range(100,500): if mod(len(imdata), m) == 0: n = len(imdata) // m figure() title((m, n)) imshow(imdata.reshape((m, len(imdata) // m))) figure(figsize=(10,10)) imshow(np.repeat(imdata.reshape((126,438,1)),3, axis=2)) figure(figsize=(10,10)) imshow(np.repeat(imdata.reshape((126,438,1)),3, axis=2), extent=(0,146,0,126))