table = zeros((12, 12), dtype=int16)
for i in range(12):
for j in range(12):
table[i, j] = (i + 1) * (j + 1)
print table
[[ 1 2 3 4 5 6 7 8 9 10 11 12] [ 2 4 6 8 10 12 14 16 18 20 22 24] [ 3 6 9 12 15 18 21 24 27 30 33 36] [ 4 8 12 16 20 24 28 32 36 40 44 48] [ 5 10 15 20 25 30 35 40 45 50 55 60] [ 6 12 18 24 30 36 42 48 54 60 66 72] [ 7 14 21 28 35 42 49 56 63 70 77 84] [ 8 16 24 32 40 48 56 64 72 80 88 96] [ 9 18 27 36 45 54 63 72 81 90 99 108] [ 10 20 30 40 50 60 70 80 90 100 110 120] [ 11 22 33 44 55 66 77 88 99 110 121 132] [ 12 24 36 48 60 72 84 96 108 120 132 144]]
The example approximation problem: 7203 * 6892.
print 7203 * 6892
print 7000 * 7000
print (7000 * 7000 - 7203 * 6892) / float(7203 * 6892) * 100
49643076 49000000 -1.29539918115
If I knew my 72 times table, I could have made it 7200 * 6900.
print 7203 * 6892
print 7200 * 6900
print (7200 * 6900 - 7203 * 6892) / float(7203 * 6892) * 100
49643076 49680000 0.074378952666
Let's build an approximation function that takes into account the lead digit we want to use. This is just the proof of concept.
def approximate(number, lead):
return lead * 10 ** (round(log10(number / lead)))
approximate(12345, 9)
9000.0
Now, let's do the same thing but with a list of lead digits.
def approximate(number, possible_leads):
approx = array([lead * 10 ** (round(log10(number / lead))) for lead in possible_leads])
return approx[abs(number - approx).argmin()]
approximate(18345, [1, 2, 3, 4])
20000.0
The approximate product is just the product of approximates.
def approximate_product(a, b, possible_leads):
return approximate(a, possible_leads) * approximate(b, possible_leads)
From there follows the relative error.
def relative_error(a, b, possible_leads):
return abs(approximate_product(a, b, possible_leads) / (a * b) - 1)
relative_error(545, 999, range(1, 11))
0.081650457797246778
relative_error(549, 999, range(1, 11))
0.088341529142986319
On to serious things: a random sampling of random numbers and calculating the mean error with 10's table and 12's table.
def mean_error_uniform(max_lead_digit, N=100000):
return mean([relative_error(randint(1, 1e6), randint(1, 1e6), range(1, max_lead_digit + 1)) for i in xrange(N)])
mean_error_uniform(10)
0.093972687794082327
mean_error_uniform(10, 10000)
0.095496434040729439
mean_error_uniform(12)
0.081577214232046474
mean_errors = [mean_error_uniform(i, 10000) for i in range(2, 21)]
plot(range(2, 21), mean_errors)
ylim((0, 0.5))
grid(True)
I notice that the routine is quite slow. Maybe I could make it faster with some fancy Numpy trick?
approximate(ones((10, 1)), range(1, 4))
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-75-41cdb1b69303> in <module>() ----> 1 approximate(ones((10, 1)), range(1, 4)) <ipython-input-30-95979ec72590> in approximate(number, possible_leads) 1 def approximate(number, possible_leads): ----> 2 approx = array([lead * 10 ** (round(log10(number / lead))) for lead in possible_leads]) 3 return approx[abs(number - approx).argmin()] TypeError: only length-1 arrays can be converted to Python scalars
No, it doesn't! So let's just use fewer trials in the functions...
mean_errors = [mean_error_uniform(i, N=10000) for i in range(2, 51)]
plot(range(2, 51), mean_errors)
ylim((0, 0.5))
grid(True)
Let's display the relative improvement for each value.
mean_errors = array(mean_errors)
plot(range(2, 50), (mean_errors[:-1] - mean_errors[1:]) / mean_errors[:-1])
ylim((0, 0.6))
grid(True)
Relative improvement in outcome, per extra fact memorized.
rng = arange(2, 50)
denom = map(lambda n: n ** 2 - (n - 1) ** 2, rng)
plot(rng, (mean_errors[:-1] - mean_errors[1:]) / mean_errors[:-1] / (denom))
ylim((0, 0.05))
grid(True)
Simulating with Benford's law: $\forall d \in \[1, 9\] P(d)=\log_{10}(1 + \frac{1}{d})$ Steps to follow:
probs = log10(1 + 1/arange(1, 10, dtype=float32))
G = cumsum(probs)
G
array([ 0.30103001, 0.47712126, 0.60206002, 0.69897002, 0.77815127, 0.84509802, 0.90309 , 0.95424253, 1. ], dtype=float32)
r = rand(1)
r
array([ 0.12225127])
(r < G).argmax()
0
probs = log10(1 + 1/arange(1, 10, dtype=float32))
G = cumsum(probs)
def random_benford_number():
return (rand(1) < G).argmax() + 1
random_benford_number()
2
benford_numbers = [random_benford_number() for i in range(1000000)]
bar(arange(1, 10), [count_nonzero(benford_numbers == i) for i in arange(1, 10)], label='Empirical')
plot(arange(1, 10) , sum(benford_numbers) * log10(2) * log10(1 + 1/arange(1, 10, dtype=float32)), 'or', label="Benford's law")
legend()
<matplotlib.legend.Legend at 0xb3558f0>
%timeit random_benford_number()
100000 loops, best of 3: 6.44 us per loop
%timeit randint(1, 1e6)
10000000 loops, best of 3: 114 ns per loop
6.44 / 0.114
56.49122807017544
def random_integer_benford(n):
r = randint(1, n)
return float("".join([str(random_benford_number()) for digit in str(r)]))
random_integer_benford(1000)
814.0
Finally, we can rerun the analysis on the data with "real numbers".
def mean_error_benford(max_lead_digit, N=100000):
return mean([relative_error(random_integer_benford(1e6), random_integer_benford(1e6), range(1, max_lead_digit + 1)) for i in xrange(N)])
mean_error_benford(10, N=10000)
0.14795717887345317
mean_errors_benford = [mean_error_benford(i, N=10000) for i in range(2, 21)]
mean_errors_benford = array(mean_errors_benford)
rng = arange(2, 20)
plot(rng + 1, (mean_errors_benford[:-1] - mean_errors_benford[1:]) / mean_errors_benford[:-1])
grid(True)
ylim((0, 0.5))
(0, 0.5)
rng = arange(2, 20)
denom = map(lambda n: n ** 2 - (n - 1) ** 2, rng)
plot(rng, (mean_errors_benford[:-1] - mean_errors_benford[1:]) / mean_errors_benford[:-1] / (denom))
ylim((0, 0.05))
grid(True)
The last result is quite different from what is shown on the original link but I'd like to finish here. My conclusions are as follows: