Here are fancy plots.
import datetime
# IMPORTANT DATES
ratings_have_date = datetime.date(2016, 4, 9) # Before April 9, 2016, Rating objects didn't have a date attribute
year_2017 = datetime.date(2017, 1, 1)
We can directly use Django's ORM in order to compute the number of objects created per day.
from django.contrib.auth.models import User
from collections import Counter
nb_users_per_day = Counter(map(lambda dt: dt.date(), User.objects.values_list('date_joined', flat=True)))
nb_ratings_per_day = Counter(map(lambda dt: dt.date(), Rating.objects.values_list('date', flat=True)))
We say a work is new when it has been added to someone's ratings for the first time.
nb_new_works_over_time = Counter()
already_rated = set()
for work_id, dt in Rating.objects.values_list('work_id', 'date'):
if work_id not in already_rated:
day = dt.date()
already_rated.add(work_id)
nb_new_works_over_time[day] += 1
x = sorted(set(nb_users_per_day.keys()) | set(nb_ratings_per_day.keys()))
# Useful indexes for plotting
i_rw = x.index(ratings_have_date)
i_2017 = x.index(year_2017)
import numpy as np
y_u = list(nb_users_per_day[day] for day in x)
y_uc = np.cumsum(y_u)
y_r = list(nb_ratings_per_day[day] for day in x)
y_rc = np.cumsum(y_r)
y_w = list(nb_new_works_over_time[day] for day in x)
y_wc = np.cumsum(y_w)
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
cols = ['Every day from the beginning', 'Cumulative', 'From 2017']
rows = ['Users', 'Ratings', 'Works']
fig, axes = plt.subplots(3, 3, figsize=(16, 10), sharex='col') # x-axis will be shared across columns
(users, users_cumul, users2017), (ratings, ratings_cumul, ratings2017), (works, works_cumul, works2017) = axes
users.xaxis.set_major_formatter(DateFormatter('%b %y'))
users_cumul.xaxis.set_major_formatter(DateFormatter('%b %y'))
users2017.xaxis.set_major_formatter(DateFormatter('%b %y'))
users.plot_date(x, y_u, 'orangered')
users_cumul.plot_date(x, y_uc, '-')
users2017.plot_date(x[i_2017:], y_uc[i_2017:], '-')
ratings.plot_date(x[i_rw+1:], y_r[i_rw+1:], 'orangered')
ratings_cumul.plot_date(x[i_rw:], y_rc[i_rw:], '-')
ratings2017.plot_date(x[i_2017:], y_rc[i_2017:], '-')
works.plot_date(x[i_rw+1:], y_w[i_rw+1:], 'orangered')
works_cumul.plot_date(x[i_rw:], y_wc[i_rw:], '-')
works2017.plot_date(x[i_2017:], y_wc[i_2017:], '-')
# fig.autofmt_xdate() only displays labels at the bottom so we need something else
for ax in axes.flatten():
for tk in ax.get_xticklabels():
tk.set_visible(True)
tk.set_rotation(30)
for ax, col in zip(axes[0], cols):
ax.set_title(col)
for ax, row in zip(axes[:,0], rows):
ax.set_ylabel(row, size='large')
plt.suptitle('Mangaki Fancy Stats')
fig.tight_layout()
fig.subplots_adjust(top=0.92)
plt.show()
Since end of April 17, we seem to have many more works in the database. How come?
Answer: this PR from @RaitoBezarius that fixes myAnimeList imports.