MalmöMilen start group distribution

The registration form ask participants to estimate their time in order to create groups.

Here is a visualization of the number of people per start group.

In [2]:
from bs4 import BeautifulSoup
import requests
import json

url = 'http://www.malmomilen.se/anmaelan/startlista-2014'
page = requests.get(url)
soup = BeautifulSoup(page.text)
table = soup.body.table.find_all('tr')

ps = {}
for line in table:
    c = line.find_all('td')
    time = c[4].text if c[4].text != '' else 'N/A'
    if time not in ps:
        ps[time] = 0
    ps[time] += 1

print("There are " + str(len(table)) + " participants")
There are 7080 participants
In [4]:
% matplotlib inline

import matplotlib.pyplot as plt; plt.rcdefaults()
import numpy as np

sorted_groups = ['Elitgruppen', 'Under 42 minuter', 'Under 45 minuter', '45-50 minuter', 
                 '50-55 minuter', '55-60 minuter', '60+ minuter', 'Barnloppet']
groups = [g[:8] for g in sorted_groups]
y_pos = np.arange(len(groups))[::-1]
n = [ps[group] for group in groups]

rects = plt.barh(y_pos, n, align='center', alpha=0.5)
plt.yticks(y_pos, sorted_groups)
plt.xlabel('Number of participants')
plt.title('Start group distribution')

for i, rect in enumerate(rects):
    plt.text(0.95 * rect.get_width(), rect.get_y() + rect.get_height() / 2.0, ps[groups[i]], ha='right', va='center')

plt.show()