#!/usr/bin/env python
# coding: utf-8
#
#
#
# ### Which genre contains the longest movies on average, and how long are they?
#
# ### Find the highest ranked movie per genre
# ### Find the longest movie per year
# ### Can you print the previous output sorted by year?
#
#
#
# ### Answers
# Which genre contains the longest movies on average, and how long are they?
# In[1]:
fh = open('../../downloads/250.imdb', 'r', encoding = 'utf-8')
genreDict = {}
for line in fh:
if not line.startswith('#'):
cols = line.strip().split('|')
genre = cols[5].strip()
glist = genre.split(',')
runtime = cols[3]
for entry in glist:
if not entry.lower() in genreDict:
genreDict[entry.lower()] = [int(runtime)]
else:
genreDict[entry.lower()].append(int(runtime))
fh.close()
longest = [0,''] # save the longest genres
for genre in genreDict:
average = sum(genreDict[genre])/len(genreDict[genre])
if average > longest[0]:
longest = [average, genre]
hours = longest[0]/3600
minutes = (longest[0] - (3600*int(hours)))/60
print(longest[1]+' is the genre with the longest movies at '+str(int(hours))+'h'+str(round(minutes))+'min')
# Find the highest ranked movie per genre
# In[3]:
fh = open('../../downloads/250.imdb', 'r', encoding = 'utf-8')
genreDict = {}
for line in fh:
if not line.startswith('#'):
cols = line.strip().split('|')
rating = float(cols[1].strip())
movie = cols[6].strip()
genre = cols[5].strip()
glist = genre.split(',')
for entry in glist:
if not entry.lower() in genreDict: # if genre in dictionary, add first movie
genreDict[entry.lower()] = [rating, movie]
else:
if rating > genreDict[entry.lower()][0]: # only replace if rating is higher
genreDict[entry.lower()] = [rating, movie]
fh.close()
for genre in genreDict:
print('The best movie for '+genre.capitalize()+' is:\n\t'+genreDict[genre][1]+' ('+str(genreDict[genre][0])+')')
# Find the longest movie per year
# In[5]:
fh = open('../../downloads/250.imdb', 'r', encoding = 'utf-8')
yearDict = {}
for line in fh:
if not line.startswith('#'):
cols = line.strip().split('|')
year = int(cols[2].strip())
length = int(cols[3].strip())
movie = cols[6].strip()
if not year in yearDict: # if genre in dictionary, add first movie
yearDict[year] = [length, movie]
else:
if length > yearDict[year][0]: # only replace if rating is higher
yearDict[year] = [length, movie]
fh.close()
for year in yearDict:
hours = yearDict[year][0]/3600
minutes = (yearDict[year][0] - (3600*int(hours)))/60
print('The longest movie for '+str(year)+' is:\n\t'+yearDict[year][1]+\
' ('+str(int(hours))+'h'+str(round(minutes))+'min)')
# Can you print the previous output sorted by year?
# In[6]:
fh = open('../../downloads/250.imdb', 'r', encoding = 'utf-8')
yearDict = {}
for line in fh:
if not line.startswith('#'):
cols = line.strip().split('|')
year = int(cols[2].strip())
length = int(cols[3].strip())
movie = cols[6].strip()
if not year in yearDict: # if genre in dictionary, add first movie
yearDict[year] = [length, movie]
else:
if length > yearDict[year][0]: # only replace if rating is higher
yearDict[year] = [length, movie]
fh.close()
sorted_years = sorted(list(yearDict.keys()))
for year in sorted_years:
hours = yearDict[year][0]/3600
minutes = (yearDict[year][0] - (3600*int(hours)))/60
print('The longest movie for '+str(year)+' is:\n\t'+yearDict[year][1]+\
' ('+str(int(hours))+'h'+str(round(minutes))+'min)')