All imports.
%matplotlib inline
from __future__ import division
import logging, sys, random
from time import time
from sklearn.cluster import KMeans
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import BernoulliNB
from sklearn.decomposition import PCA
from sklearn.decomposition import TruncatedSVD
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import Normalizer
from sklearn.cross_validation import cross_val_score
from sklearn import metrics
from sklearn.metrics import pairwise_distances
import numpy as np
from scipy.stats import mode
import matplotlib.pyplot as plt, mpld3
from mpld3 import plugins
# mpld3.enable_notebook()
def find_movie(os_id):
return filter(lambda movie: movie['osID'] == str(os_id), movies)
def make_histogram(innerDict):
x = np.arange(len(innerDict.keys()))
y = innerDict.values()
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(1,1,1)
ax.bar(x, y)
ax.set_xticks(x)
ax.set_xticklabels(innerDict.keys(), rotation=70)
plt.show()
Load Movie Data
from load import movies
movies = np.array(movies)
print "loaded data", len(movies)
genres = set()
for movie in movies:
for genre in movie.get('Genre', []):
genres.add(genre)
genres = list(genres)
print genres
loaded data 6253 [u'Sci-Fi', u'Crime', u'Romance', u'Animation', u'Music', u'Adult', u'Comedy', u'War', u'Horror', u'Film-Noir', u'Adventure', u'News', u'Thriller', u'Western', u'Mystery', u'Short', u'N/A', u'Drama', u'Action', u'Documentary', u'Musical', u'History', u'Family', u'Fantasy', u'Sport', u'Biography']
N_CLUSTERS = 5
reduce_dimensionality = True
k_means = KMeans(n_clusters=N_CLUSTERS, init='k-means++', max_iter=100, n_init=1, verbose=True)
vectorizer = TfidfVectorizer(max_df=0.5, min_df=0.1, stop_words='english')
lsa = TruncatedSVD(2)
text = [movie['script'] for movie in movies]
vectors = vectorizer.fit_transform(text)
if reduce_dimensionality == True:
X = lsa.fit_transform(vectors)
else:
X = vectors
km = k_means.fit(X)
Initialization complete Iteration 0, inertia 55.010 Iteration 1, inertia 43.620 Iteration 2, inertia 42.596 Iteration 3, inertia 42.334 Iteration 4, inertia 42.189 Iteration 5, inertia 42.068 Iteration 6, inertia 41.976 Iteration 7, inertia 41.896 Iteration 8, inertia 41.823 Iteration 9, inertia 41.768 Iteration 10, inertia 41.717 Iteration 11, inertia 41.675 Iteration 12, inertia 41.617 Iteration 13, inertia 41.565 Iteration 14, inertia 41.545 Iteration 15, inertia 41.532 Iteration 16, inertia 41.520 Iteration 17, inertia 41.507 Iteration 18, inertia 41.494 Iteration 19, inertia 41.478 Iteration 20, inertia 41.458 Iteration 21, inertia 41.430 Iteration 22, inertia 41.384 Iteration 23, inertia 41.309 Iteration 24, inertia 41.166 Iteration 25, inertia 40.922 Iteration 26, inertia 40.615 Iteration 27, inertia 40.323 Iteration 28, inertia 40.076 Iteration 29, inertia 39.915 Iteration 30, inertia 39.809 Iteration 31, inertia 39.731 Iteration 32, inertia 39.676 Iteration 33, inertia 39.649 Iteration 34, inertia 39.635 Iteration 35, inertia 39.624 Iteration 36, inertia 39.611 Iteration 37, inertia 39.601 Iteration 38, inertia 39.593 Iteration 39, inertia 39.586 Iteration 40, inertia 39.579 Iteration 41, inertia 39.571 Iteration 42, inertia 39.562 Iteration 43, inertia 39.558 Converged at iteration 43
k_means_labels = k_means.labels_
k_means_cluster_centers = k_means.cluster_centers_
k_means_labels_unique = np.unique(k_means_labels)
terms = vectorizer.get_feature_names()
if reduce_dimensionality == False:
order_centroids = k_means_cluster_centers.argsort()[:, ::-1]
for i in range(N_CLUSTERS):
print [(terms[ind], k_means_cluster_centers[i][ind]) for ind in order_centroids[i, :100]]
print ""
for k in range(N_CLUSTERS):
z = vectors.toarray()[k_means_labels == k]
wordz_tfidf = [(terms[i], z[:,i].sum()) for i in range(z.shape[1])]
wordz_tfidf = sorted(wordz_tfidf, key=lambda x: x[1], reverse=True )
print wordz_tfidf[:100]
print ""
[(u'lord', 98.166686329454137), (u'master', 87.992903421941833), (u'king', 86.417228141490284), (u'brother', 62.734085101417911), (u'doctor', 49.950350683161602), (u'child', 49.6539755022592), (u'daughter', 49.474835442101373), (u'madame', 47.464176046913664), (u'war', 46.057368458975986), (u'sister', 44.063794614458814), (u'children', 42.38001294540318), (u'captain', 39.291449633931492), (u'prince', 39.240420784714772), (u'return', 38.619742058993552), (u'professor', 38.350708392844808), (u'forgive', 38.159126004400107), (u'general', 37.7188227993448), (u'il', 37.623688566480851), (u'sword', 36.903735481964667), (u'queen', 36.345847558836383), (u'madam', 36.255053534978728), (u'blood', 33.76066425438578), (u'fight', 33.3239474662251), (u'uncle', 33.323042316150634), (u'peace', 33.282213189053202), (u'village', 33.208947462947542), (u'marry', 33.20434950606014), (u'alright', 32.652671516456877), (u'earth', 32.637549376388932), (u'paris', 30.971632311217952), (u'fear', 30.92269903018942), (u'power', 29.125756022444445), (u'army', 28.605320252379926), (u'body', 28.481515611053673), (u'count', 28.406224776401356), (u'soul', 28.344065656038676), (u'lf', 28.293534797196919), (u'land', 27.83488359055578), (u'letter', 27.785725545527111), (u'ah', 27.758717731719067), (u'law', 27.612010659391149), (u'died', 27.566598467770852), (u'alive', 26.852529536084322), (u'palace', 26.851780814156832), (u'strange', 26.790929694884724), (u'priest', 26.477122567518663), (u'evil', 26.392173123375581), (u'dr', 26.346973378536035), (u'gentlemen', 26.150341940388738), (u'france', 26.107387943190794), (u'wine', 26.018223569640398), (u'quickly', 25.82244126990868), (u'words', 25.7726867772963), (u'goodbye', 25.546196257918091), (u'soldiers', 25.527329329179508), (u'attack', 24.947663650148762), (u'dare', 24.935881022114845), (u'enemy', 24.836143721218807), (u'city', 24.834608676988516), (u'marriage', 24.688555927470802), (u'heaven', 24.57086985297893), (u'court', 24.468604506328376), (u'police', 24.11540750468928), (u'devil', 24.071194773297961), (u'fool', 23.995063883930818), (u'anymore', 23.99308382479753), (u'music', 23.762656050431236), (u'holy', 23.493135567424027), (u'beg', 23.484087983976703), (u'follow', 23.471963832088388), (u'escape', 23.442706393194733), (u'accept', 23.370182329324038), (u'human', 23.20604140774104), (u'sea', 23.160864646419746), (u'welcome', 23.090340126613491), (u'gold', 22.920189007017001), (u'happiness', 22.798702274633872), (u'secret', 22.792581890442079), (u'mercy', 22.583039183886502), (u'ship', 22.386948742093384), (u'pray', 22.184788989026824), (u'loved', 22.099068383797295), (u'promise', 21.930702104541712), (u'person', 21.85233975894058), (u'german', 21.761274492439952), (u'ok', 21.684835985034006), (u'news', 21.667074453876083), (u'hell', 21.544458466120567), (u'sing', 21.530996830325314), (u'horse', 21.503560465630187), (u'dream', 21.364926666055268), (u'given', 21.33903145329981), (u'possible', 21.312688931764097), (u'sun', 21.274245624786445), (u'sake', 21.250641255285409), (u'calm', 21.247391490715962), (u'honor', 21.194368455826822), (u'write', 21.124735267073085), (u'act', 21.122372850815992), (u'school', 21.043781410262199)] [(u'gonna', 161.84550229849327), (u'ain', 122.7619020961188), (u'uh', 83.598679243389427), (u'okay', 80.136981782766185), (u'em', 59.880793571960204), (u'huh', 57.938751894394258), (u'gotta', 57.032350278322653), (u'wanna', 55.926115780950745), (u'ya', 46.129738794174514), (u'guy', 45.91604319976009), (u'kid', 44.684743957504992), (u'boys', 39.026440477154708), (u'joe', 37.979969181035287), (u'honey', 36.726462822180011), (u'mrs', 36.340216863791731), (u'town', 36.326354987141031), (u'ma', 33.663398225457605), (u'baby', 32.770838557724588), (u'hi', 29.74996928059312), (u'somebody', 29.008058644905184), (u'gun', 28.647541889049393), (u'guys', 27.085612837578871), (u'car', 26.095637084381885), (u'anybody', 22.792936002866796), (u'doc', 22.771985122440192), (u'hmm', 22.650167502634169), (u'horse', 22.454534770834975), (u'bet', 21.269709223533013), (u'folks', 21.206494183888083), (u'couple', 20.898596364267664), (u'bye', 20.33367139426646), (u'ha', 20.159212865178439), (u'ought', 19.445082909716177), (u'suppose', 19.197442375353539), (u'darling', 19.095881143540083), (u'charlie', 18.838439582134384), (u'hit', 18.720820848150776), (u'dad', 18.705058828353401), (u'gee', 18.664376373144975), (u'frank', 18.429420896789871), (u'kids', 18.406032104856504), (u'till', 18.329340293364286), (u'captain', 18.085648176418577), (u'stuff', 18.045284465553738), (u'000', 18.043093917950568), (u'shot', 17.424274360823897), (u'mm', 17.375372908921364), (u'pick', 17.191526391961574), (u'jack', 17.068030899755925), (u'police', 16.797409860944356), (u'ride', 16.785360962237501), (u'fight', 16.715653171826069), (u'swell', 16.668687757828582), (u'phone', 16.513659668871512), (u'00', 16.490572669217038), (u'funny', 16.482091381671257), (u'bank', 16.305875714539908), (u'10', 16.261802879042634), (u'pop', 16.204671623198482), (u'girls', 16.18605499313384), (u'george', 16.184555293693094), (u'york', 16.18244193879654), (u'week', 16.077636488089468), (u'brother', 15.993776775122305), (u'fellas', 15.923736683215134), (u'office', 15.914925692189392), (u'doctor', 15.676298607845636), (u'fella', 15.674513712535814), (u'boss', 15.600815301906838), (u'dollars', 15.492344446117933), (u'shoot', 15.436952679270158), (u'figure', 15.36609997815337), (u'wonderful', 15.338803580776451), (u'gentlemen', 15.264614314158976), (u'number', 15.093516418709395), (u'john', 15.062816054670659), (u'lieutenant', 14.830829390491669), (u'school', 14.669446031570736), (u'mom', 14.623329754686853), (u'coffee', 14.605980386424294), (u'mama', 14.526322039485297), (u'train', 14.494903118890932), (u'tough', 14.458855781278725), (u'deal', 14.456313756087173), (u'city', 14.426503852492708), (u'party', 14.400646653180232), (u'bucks', 14.361534086805896), (u'hell', 13.98803613120468), (u'street', 13.970775936209744), (u'hot', 13.893859173273142), (u'ah', 13.705145995123925), (u'beat', 13.700710169084614), (u'picture', 13.477086622564725), (u'game', 13.472003757571109), (u'certainly', 13.451912016993619), (u'law', 13.451297071976203), (u'dog', 13.425250262649492), (u'boat', 13.375937756829597), (u'check', 13.323123345055546), (u'line', 13.268444468671213)] [(u'mrs', 80.778055109660713), (u'doctor', 78.751160245974404), (u'police', 77.353771964761279), (u'car', 75.93989498994803), (u'ok', 70.23984099623253), (u'darling', 65.890574892253056), (u'goodbye', 64.377442625787097), (u'ah', 63.868804397011033), (u'bye', 62.34597493023827), (u'alright', 60.192429725917876), (u'dr', 59.824923596323075), (u'okay', 59.547344992358759), (u'000', 55.755744841630758), (u'brother', 55.285016227948418), (u'war', 54.620577355348118), (u'child', 53.813976462973798), (u'captain', 53.592929318460151), (u'dad', 51.786135021933717), (u'children', 48.750266474203414), (u'madam', 48.428769391443211), (u'boss', 47.812049910174501), (u'gentlemen', 47.746676062943898), (u'girls', 47.557633627225655), (u'paris', 46.148995535121479), (u'sister', 45.69372582997066), (u'daughter', 45.143083549645837), (u'school', 44.704587584127445), (u'guy', 44.417345954150505), (u'damn', 43.183431132392855), (u'anymore', 42.594616275535678), (u'uncle', 42.462916409057655), (u'train', 41.898225670446813), (u'professor', 41.315343257319711), (u'eh', 40.517713671353761), (u'marry', 39.090134194494624), (u'10', 38.635899417110338), (u'stupid', 38.426252738834464), (u'dance', 37.655620014237115), (u'madame', 37.248865691469248), (u'hotel', 36.886164646816319), (u'mom', 36.586392208411183), (u'number', 36.133510457559211), (u'wonderful', 35.852226286465324), (u'write', 35.794733441041963), (u'suppose', 35.674959637545982), (u'letter', 35.373206804457226), (u'london', 35.150880299670426), (u'general', 35.072156654250456), (u'strange', 34.922837939552124), (u'fun', 34.92164214150111), (u'hell', 34.74687100354582), (u'colonel', 34.610486960985149), (u'baby', 34.509564136218046), (u'lf', 33.887431626832758), (u'person', 33.698516343452937), (u'tired', 33.614981355675269), (u'office', 33.453528697839921), (u'boat', 33.368624669104349), (u'careful', 33.008225440076124), (u'hi', 32.981297289940088), (u'music', 32.781936803989595), (u'funny', 32.635177324264049), (u'ma', 32.591850819590746), (u'forgive', 32.363214627552168), (u'dinner', 32.215057436441015), (u'party', 32.134109245564275), (u'aunt', 32.083076916299802), (u'shoot', 31.882453722253846), (u'calm', 31.701881315853672), (u'fool', 31.55706567406471), (u'law', 31.537327023654534), (u'story', 31.454580494157042), (u'mad', 31.379624961294347), (u'yesterday', 31.34308070988132), (u'phone', 31.205204579961666), (u'tea', 31.148052455131801), (u'small', 30.877794027384731), (u'town', 30.87261836370822), (u'coffee', 30.653814767267207), (u'lovely', 30.37473975981878), (u'scared', 30.268542637253866), (u'lord', 30.193427250231068), (u'company', 30.066540205969883), (u'guys', 30.006822281321003), (u'kiss', 30.002883676898083), (u'months', 29.998958490925602), (u'died', 29.993072770027972), (u'sick', 29.983694226464852), (u'murder', 29.919617870362696), (u'fight', 29.897614827534355), (u'certainly', 29.847455469344354), (u'huh', 29.703762341975146), (u'idiot', 29.700302212027932), (u'body', 29.594252095403323), (u'question', 29.515483558251969), (u'quick', 29.275363169138828), (u'sergeant', 29.197317061869228), (u'boys', 29.163262226953805), (u'german', 29.107141965164395), (u'fault', 29.075271216246829)] [(u'00', 61.241217838360988), (u'il', 36.30973410810288), (u'film', 33.451323379100472), (u'captain', 23.584632234086662), (u'brother', 20.703125876235031), (u'ok', 20.690674732657271), (u'gold', 19.992385336378327), (u'sea', 19.472720655472919), (u'village', 19.207294696916204), (u'city', 18.520323048318659), (u'music', 18.465933387164796), (u'boss', 18.404623150200422), (u'sister', 17.739331618841533), (u'director', 17.731259612454327), (u'daughter', 17.23925563740881), (u'child', 16.650342230320717), (u'la', 16.649290816214879), (u'act', 16.635133590528447), (u'dr', 16.510830671826803), (u'000', 16.503997134752527), (u'colonel', 16.464349305435469), (u'master', 16.140836277034794), (u'ha', 16.049521363384052), (u'war', 16.026749530565692), (u'san', 15.824401010655748), (u'german', 15.79883989003563), (u'children', 15.640149564768709), (u'land', 15.464394540294895), (u'doctor', 15.191657215125977), (u'fight', 15.176828649485056), (u'mary', 14.902008527790047), (u'police', 14.871716846405359), (u'professor', 14.690805542954394), (u'ship', 14.594393131070539), (u'prince', 14.475868331784334), (u'okay', 14.243486911968038), (u'chief', 13.974574470693781), (u'uncle', 13.925146143235338), (u'mountain', 13.916842636011328), (u'school', 13.795774165624707), (u'return', 13.791412836174471), (u'horse', 13.515227063656367), (u'paris', 13.437367388602642), (u'song', 13.417257041127264), (u'black', 13.359408010566606), (u'dad', 13.172501019998835), (u'goodbye', 13.030411887879859), (u'john', 12.968469933304995), (u'island', 12.923736048739933), (u'general', 12.905286804294505), (u'story', 12.834998087099105), (u'damn', 12.755886294104339), (u'town', 12.693362024411272), (u'lieutenant', 12.677018695065238), (u'ah', 12.435499869846845), (u'marry', 12.298391665411941), (u'king', 12.292609154369345), (u'red', 12.224024949966974), (u'earth', 12.042473890456684), (u'dance', 11.951296272157499), (u'lord', 11.950620038741686), (u'alright', 11.913208097628008), (u'car', 11.848353954886141), (u'madame', 11.78675614554135), (u'dog', 11.765869011269167), (u'white', 11.749010738794343), (u'fish', 11.666899287286189), (u'mrs', 11.551047549964597), (u'gentlemen', 11.420062818371981), (u'train', 11.393892366376917), (u'blood', 11.302063641404215), (u'died', 11.280279265997093), (u'anymore', 11.27986972526665), (u'farm', 11.210366565802193), (u'sing', 11.207733136761743), (u'guy', 11.069247906021582), (u'power', 10.930564192398847), (u'army', 10.923550326153263), (u'happiness', 10.866966843867525), (u'small', 10.848660306274772), (u'sword', 10.812056650286806), (u'boys', 10.768138508440829), (u'boat', 10.739462879484375), (u'party', 10.72831468457059), (u'directed', 10.727635826685965), (u'bye', 10.629549068792148), (u'enemy', 10.584205199673409), (u'letter', 10.582579944535453), (u'mom', 10.556716881111452), (u'sun', 10.515508334314228), (u'gonna', 10.473946136105873), (u'person', 10.397435360690531), (u'gun', 10.32176078318329), (u'secret', 10.268998045519972), (u'major', 10.199282596814395), (u'subtitles', 10.134087437326812), (u'alive', 10.133741954962158), (u'follow', 10.077238082504584), (u'count', 9.8624948394607959), (u'art', 9.8522805898439465)] [(u'gonna', 107.51743808417224), (u'okay', 83.935160877685561), (u'mrs', 74.275636102465796), (u'uh', 67.641783541518151), (u'ain', 57.909289758974467), (u'huh', 48.474639389936804), (u'guy', 48.226697872024843), (u'darling', 45.763478338705923), (u'car', 43.70648522870728), (u'wanna', 39.170665567706436), (u'ok', 38.910269087424211), (u'town', 38.494505695152199), (u'baby', 38.039119812473189), (u'dad', 37.913317295473831), (u'captain', 37.561958650628718), (u'joe', 37.124626310256701), (u'em', 36.929787450242884), (u'gun', 36.836452412287173), (u'boys', 36.395714896846187), (u'gotta', 35.835340404035847), (u'dr', 35.324404584919513), (u'kid', 34.975280253648123), (u'police', 34.820822434390521), (u'somebody', 34.206230100254231), (u'doctor', 34.128793679387059), (u'george', 33.845813279541126), (u'hi', 33.826561633435809), (u'ma', 32.875332229082446), (u'honey', 32.697703855820492), (u'suppose', 31.913058552886248), (u'000', 31.81812146654655), (u'goodbye', 30.953483877846892), (u'john', 30.766020240590329), (u'bye', 30.695466624497026), (u'guys', 29.866236691313489), (u'charlie', 29.17315453562647), (u'lieutenant', 28.894325317513633), (u'ha', 28.411268944033498), (u'wonderful', 28.410120860524898), (u'phone', 28.264827089323671), (u'brother', 28.199932742455708), (u'anybody', 27.869643953059693), (u'colonel', 27.721705537664047), (u'york', 27.632859722989583), (u'jack', 26.742294550326754), (u'horse', 26.501798906148988), (u'lf', 26.453738131430224), (u'00', 26.297976780569783), (u'boss', 26.13138289842313), (u'office', 26.071981208283372), (u'girls', 26.06022340076747), (u'gentlemen', 25.943786316573672), (u'ought', 25.606042472908978), (u'ah', 25.54476343288859), (u'doc', 25.427510999354574), (u'war', 25.227993363851748), (u'bet', 24.666091745663024), (u'shot', 24.646189916634263), (u'10', 24.599818387904165), (u'number', 24.439797211458831), (u'certainly', 24.329185665028245), (u'hell', 24.196137934753235), (u'funny', 23.811826176848616), (u'hotel', 23.719050237377722), (u'dance', 23.673757363931657), (u'dinner', 23.670872320099395), (u'week', 23.616167627784947), (u'boat', 23.584595525523277), (u'hmm', 23.434848655290345), (u'kids', 23.271255228672366), (u'till', 23.242648079107227), (u'couple', 22.966225494430503), (u'hit', 22.665843790868344), (u'pop', 22.341781354019218), (u'murder', 22.275183536159258), (u'party', 22.274811324728837), (u'check', 22.029274793903106), (u'train', 22.011934171945477), (u'fight', 21.864380383778656), (u'professor', 21.858784176726481), (u'stuff', 21.827162989797294), (u'ya', 21.700743508850021), (u'pick', 21.671081446188701), (u'story', 21.579452582270136), (u'school', 21.546243547325428), (u'sergeant', 21.337786242281311), (u'daddy', 21.314074959772448), (u'city', 21.193060556723164), (u'probably', 21.068544531616546), (u'marry', 21.025524266685686), (u'uncle', 20.876262647152835), (u'mary', 20.647851132735077), (u'deal', 20.3727703949573), (u'mom', 20.219730848553091), (u'music', 20.163161185311282), (u'shoot', 20.122747703706082), (u'dollars', 19.94223719554984), (u'children', 19.805535044032005), (u'white', 19.774816849663871), (u'line', 19.639365499679748)]
if reduce_dimensionality == True:
mpld3.enable_notebook()
fig,ax = plt.subplots(figsize=(15,10)) #.figure(figsize=(20,10))
# ax = fig.add_subplot(1,1,1)
# ax.grid(True, alpha=0.3)
colors = [(random.random(), random.random(), random.random()) for x in range(N_CLUSTERS)]
for k, col in zip(range(N_CLUSTERS), colors):
my_members = k_means_labels == k
cluster_center = k_means_cluster_centers[k]
points = ax.plot(X[my_members, 0], X[my_members, 1], 'w', markerfacecolor=col, marker='.', label='Cluster %i' % k)
centers = ax.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col, markeredgecolor='k', markersize=6)
labels = []
for movie in movies[k_means_labels == k]:
labels.append(movie.get('Title', '') + " " + movie.get('osID', '') + " " + movie.get('imdbID', '') + " " + ", ".join(movie.get('Genre', '')) + " " + movie.get('', '') + " ")
tooltip = plugins.PointHTMLTooltip(points[0], labels, voffset=10, hoffset=10)
plugins.connect(fig, tooltip)
ax.set_title('KMeans')
ax.set_xticks(())
ax.set_yticks(())
ax.legend()
mpld3.disable_notebook()
# find_movie(3909)
genre_histogram = []
for k in range(N_CLUSTERS):
innerDict = dict.fromkeys(genres, 0)
for movie in movies[k_means_labels == k]:
current_genres = movie.get('Genre', '')
for genre in current_genres:
if genre in innerDict:
innerDict[genre] = innerDict[genre]+1
else:
innerDict[genre] = 0
genre_histogram.append(innerDict)
year_histogram = []
for k in range(N_CLUSTERS):
innerDict = {}
for movie in movies[k_means_labels == k]:
year = movie.get('Year', '')
if year in innerDict:
innerDict[year] = innerDict[year]+1
else:
innerDict[year] = 0
year_histogram.append(innerDict)
# make_histogram(genre_histogram[0])
# make_histogram(genre_histogram[1])
# make_histogram(genre_histogram[2])