#!/usr/bin/env python # coding: utf-8 # # Generate bitcoin addresses for Irving & Holden's 2016 clinical trial word document # # ### By [Daniel Himmelstein](http://dhimmel.com/) ([@dhimmel](https://twitter.com/dhimmel)) # # This notebook computes the bitcoin addresses for the [word document](https://f1000researchdata.s3.amazonaws.com/datasets/8114/9c9f9a18-a852-40c6-953e-c75107abc714_Appendix_1_-_unformatted_text_file_.docx) from the following study: # # > **How blockchain-timestamped protocols could improve the trustworthiness of medical science** [[version 2; referees: 3 approved]](https://doi.org/b2pt)
# Greg Irving, John Holden
# _F1000Research_ (2016) DOI: [10.12688/f1000research.8114.2](https://doi.org/10.12688/f1000research.8114.2) # # It uses the method described by Benjamin Gregory Carlisle in a 2014 blog post titled [Proof of prespecified endpoints in medical research with the bitcoin blockchain](http://www.bgcarlisle.com/blog/2014/08/25/proof-of-prespecified-endpoints-in-medical-research-with-the-bitcoin-blockchain/). # # **Warning: the Carlisle method is not the recommended approach for _proof of existence_ using Bitcoin. This notebook is not an endorsement of the method, but rather a demostration that the address generation in the Irving & Holden study is flawed.** # ## Dependencies # # This is a Python 3 notebook. It requires [Python Bitcoin Tools](https://github.com/vbuterin/pybitcointools), which can be installed with `pip install bitcoin`. This notebook was generated using `bitcoin==1.1.42` from [PyPI](https://pypi.python.org/pypi/bitcoin/1.1.42). # In[1]: from urllib.request import urlopen import hashlib import bitcoin # ## Generate the private key # # Get the sha256 hash for [Dataset 1. Unformatted text file. # ](https://doi.org/10.5256/f1000research.8114.d114596 "Irving & Holden. F1000 Research. Dataset 1. Unformatted text file.") # In[2]: url = 'https://f1000researchdata.s3.amazonaws.com/datasets/8114/9c9f9a18-a852-40c6-953e-c75107abc714_Appendix_1_-_unformatted_text_file_.docx' response = urlopen(url) data = response.read() checksum = hashlib.sha256(data) private_key = checksum.hexdigest() private_key # In[3]: # Get the private key's format bitcoin.get_privkey_format(private_key) # ## Generate the corresponding public keys # # There are two common types of bitcoin public keys (compressed and uncompressed) that result in different bitcoin addresses. Neither Carlisle or Irving & Holden report which type of public they use, so we'll try both. # In[4]: # Uncompressed public key public_key = bitcoin.privkey_to_pubkey(private_key) public_key # In[5]: # Compressed public key public_key_compressed = bitcoin.compress(public_key) public_key_compressed # ## Generate the corresponding addresses # # Note that neither address matches the address reported by Irving & Holden, which was [`1AHjCz2oEUTH8js4S8vViC8NKph4zCACXH`](https://blockchain.info/address/1AHjCz2oEUTH8js4S8vViC8NKph4zCACXH). # In[6]: # Uncompressed address address = bitcoin.pubkey_to_address(public_key) address # In[7]: # Compressed address address_compressed = bitcoin.pubkey_to_address(public_key_compressed) address_compressed # In[8]: # Check whether the Irving & Holden address is wrong address_irving = '1AHjCz2oEUTH8js4S8vViC8NKph4zCACXH' if not address_irving in {address, address_compressed}: print('Irving & Holden have a big problem.') # ## Check whether either of the correct addresses has ever been used # # As of March 6, 2017, neither address has been used. # In[9]: # URLs for blockchain.info address details for address in address, address_compressed: url = 'https://blockchain.info/address/{}'.format(address) print(url) # ## Alternative implementation # # For an altertative implementation, you can generate the sha256 checksum via the unix shell: # # ```sh # URL=https://f1000researchdata.s3.amazonaws.com/datasets/8114/9c9f9a18-a852-40c6-953e-c75107abc714_Appendix_1_-_unformatted_text_file_.docx # curl --silent $URL | shasum --algorithm 256 # ``` # # Then you can use [bitaddress.org](https://www.bitaddress.org) to generate the bitcoin addresses. Just go to the "Wallet Details" page and paste the sha256 hash into the "Enter Private Key" field. This approach generates the same addresses as this notebook. # ## Plain text hashes # # Since [Xorbin](http://www.xorbin.com/tools/sha256-hash-calculator) appears to only support hashing of pasted text rather than an uploaded file, it's likely Irving & Holden pasted the word document contents into Xorbin. It's difficult to recreate exactly how the formatted word document was converted to plain text. Below we convert addresses for one possible plain text representation. # In[10]: # See carlisle.py for the source of the carlisle_method function # that implements the address generation logic above. from carlisle import carlisle_method # In[11]: # This data was produced by selecting all from the work document, copying, # and pasting on macOS 10.12.3 using Microsoft Word for Mac 2011 Version 14.0.0 # It's entirely possible the version below has already been corrupted due to automated # newline encoding conversions. data = b'''\ Study Type: Interventional Study Design: Allocation: Randomized Endpoint Classification: Safety/Efficacy Study Intervention Model: Parallel Assignment Masking: Open Label Primary Purpose: Prevention Official Title: Cardiovascular and Metabolic Effects of Moderate Alcohol Consumption in Type 2 Diabetes Further study details as provided by Ben-Gurion University of the Negev: Primary Outcome Measures: Glycemic control [ Time Frame: 6 months ] [ Designated as safety issue: Yes ] Secondary Outcome Measures: CVD status [ Time Frame: 6 months ] [ Designated as safety issue: Yes ] ''' # In[12]: carlisle_method(data, compress=False) # In[13]: carlisle_method(data, compress=True) # ## Update for protocol for manuscript version 3 # # On March 30, 2017, Irving & Holden posted [version 3](https://doi.org/10.12688/f1000research.8114.3) of their study to _F1000Research_. This version contains a new "[Dataset 1.Unformatted text file](https://doi.org/10.5256/f1000research.8114.d156051)", which is a text document rather than word document. Below we find the hash and addresses for this text. # In[14]: url = 'https://f1000researchdata.s3.amazonaws.com/datasets/8114/da88d341-eeed-4630-b120-78e9ff8a9d38_CASCADE.txt' response = urlopen(url) data = response.read() data # In[15]: carlisle_method(data, compress=False) # In[16]: carlisle_method(data, compress=True)