#!/usr/bin/env python # coding: utf-8 # I came across [the "Human RNG" blog post](https://torvaney.github.io/projects/human-rng) today. Its author analyzes the distribution of random numbers given by 8500 students and proposes the following algorithm for generating approximately random numbers when querying humans for random numbers: # # ``` # - Ask a person for a random number, n1. # - n1=1,2,3,4,6,9, or 10: # - Your random number is n1 # - If n1=5: # - Ask another person for a random number (n2) # - If n2=5 (12.2%): # - Your random number is 2 # - If n2=10 (1.9%): # - Your random number is 4 # - Else, your random number is 5 # - If n1=7: # - Ask another person for a random number (n2) # - If n2=2 or 5 (20.7%): # - Your random number is 1 # - If n2=8 or 9 (16.2%): # - Your random number is 9 # - If n2=7 (28.1%): # - Your random number is 10 # - Else, your random number is 7 # - If n1=8: # - Ask another person for a random number (n2) # - If n2=2 (8.5%): # - Your random number is 1 # - Else, your random number is 8 # ``` # In this blog post, I propose to use this algorithm and verify that it works as advertised by the author. # # To do this, we will first generate random numbers according to how humans generate random numbers (using our computer). Then, we will implement the algorithm and see if it yields a uniform distribution of random numbers between 1 and 10. # # Generating random numbers like a human, using a computer # Let's start by downloading the same data than the blog writer and extract the counts for each number. # In[1]: import pandas as pd def load_data(): """Loads the data and returns counts for each number.""" df = pd.read_csv("https://git.io/fjoZ2") s = df.pick_a_random_number_from_1_10.dropna().map(int) s = s[~((s < 1) | (s > 10))] counts = s.value_counts().sort_index() percentage_counts = (counts / counts.sum() * 100) return percentage_counts # In[2]: import hvplot.pandas percentage_counts = load_data() percentage_counts.hvplot.bar(width=500) # Let's use `scipy.stats` to sample random numbers from the above arbitrary distribution. # # We first build a discrete distribution object. # In[3]: from scipy.stats import rv_discrete xk, pk = percentage_counts.index.values, percentage_counts.values / percentage_counts.values.sum() human_rn_distribution = rv_discrete(values=(xk, pk)) # And then we can sample from this distribution: # In[4]: human_rn_distribution.rvs(size=100) # We can now define a `human_RNG` function that returns a single number that follows the above number distribution: # In[5]: def human_RNG(): return int(human_rn_distribution.rvs(size=1)) human_RNG() # To verify that this works correctly, we can sample a large number of values and check that our constructed histogram is similar to the data we started with. # In[6]: pd.Series([human_RNG() for _ in range(10000)], name='counts').value_counts().sort_index().hvplot.bar(width=500) # This looks like the original distribution, so we can move on and implement the human random generator rule. # # Implementing the human RNG rule # The rule stated above can be transcribed in the following function: # In[7]: def random_number_1_10(): """Generates random numbers, assuming it gets input numbers generated by humans.""" n1 = human_RNG() if n1 in [1, 2, 3, 4, 6, 9, 10]: return n1 elif n1 == 5: n2 = human_RNG() if n2 == 5: return 2 if n2 == 10: return 4 else: return 5 elif n1 == 7: n2 = human_RNG() if n2 in [2, 5]: return 1 elif n2 in [8, 9]: return 9 elif n2 == 7: return 10 else: return 7 elif n1 == 8: n2 = human_RNG() if n2 == 2: return 1 else: return 8 random_number_1_10() # Now the interesting part: we evaluate the procedure by drawing a large amount of numbers from it and will (hopefully) find a flat distribution of numbers between 1 and 10. # In[8]: pd.Series([random_number_1_10() for _ in range(50000)], name='counts').value_counts().sort_index().hvplot.bar(width=500) # Neat. We find something that is almost flat. So the advertised procedure works as announced by the author. # # This is good to know if you ever run out of pseudo random number generators and computers ^^. # *This post was entirely written using the Jupyter Notebook. Its content is BSD-licensed. You can see a static view or download this notebook with the help of nbviewer at [20190703_HumanRandomNumbers.ipynb](http://nbviewer.ipython.org/urls/raw.github.com/flothesof/posts/master/20190703_HumanRandomNumbers.ipynb).*