Exploring the face data

This notebook plays around with the data generated by running the facial detection script over the whole Tribune collection.

In [1]:
import pandas as pd
import altair as alt
In [2]:
df = pd.read_csv('faces_per_image.csv')
In [3]:
df.describe()
Out[3]:
faces
count 60364.000000
mean 3.821433
std 6.661479
min 0.000000
25% 1.000000
50% 2.000000
75% 4.000000
max 174.000000
In [4]:
# More than 100 faces!?

df.loc[df['faces'] > 100]
Out[4]:
faces image
20402 174 FL4470426.jpg
28301 117 FL4494940.jpg
34768 145 FL4517063.jpg
36351 111 FL4522209.jpg
36353 105 FL4522212.jpg
37843 109 FL4526927.jpg
49708 127 FL4564048.jpg
50428 104 FL4566376.jpg
53969 132 FL4577551.jpg
54041 110 FL4577772.jpg
54044 144 FL4577775.jpg
54062 112 FL4577793.jpg
54172 126 FL4578163.jpg
54173 111 FL4578164.jpg
In [9]:
alt.data_transformers.enable('json')
alt.Chart(df).mark_bar().encode(
    x=alt.X('faces:Q', bin=alt.BinParams(maxbins=100)),
    y='count():Q'
)
Out[9]:
In [9]:
df['faces'].sum()
Out[9]:
230677
In [ ]: