import csv
file = open('hacker_news.csv')
hn = list(csv.reader(file))
print(hn[:5])
[['id', 'title', 'url', 'num_points', 'num_comments', 'author', 'created_at'], ['12224879', 'Interactive Dynamic Video', 'http://www.interactivedynamicvideo.com/', '386', '52', 'ne0phyte', '8/4/2016 11:52'], ['10975351', 'How to Use Open Source and Shut the Fuck Up at the Same Time', 'http://hueniverse.com/2016/01/26/how-to-use-open-source-and-shut-the-fuck-up-at-the-same-time/', '39', '10', 'josep2', '1/26/2016 19:30'], ['11964716', "Florida DJs May Face Felony for April Fools' Water Joke", 'http://www.thewire.com/entertainment/2013/04/florida-djs-april-fools-water-joke/63798/', '2', '1', 'vezycash', '6/23/2016 22:20'], ['11919867', 'Technology ventures: From Idea to Enterprise', 'https://www.amazon.com/Technology-Ventures-Enterprise-Thomas-Byers/dp/0073523429', '3', '1', 'hswarna', '6/17/2016 0:01']]
headers = hn[0]
hn = hn[1:]
print(headers)
print(hn[:4])
['id', 'title', 'url', 'num_points', 'num_comments', 'author', 'created_at'] [['12224879', 'Interactive Dynamic Video', 'http://www.interactivedynamicvideo.com/', '386', '52', 'ne0phyte', '8/4/2016 11:52'], ['10975351', 'How to Use Open Source and Shut the Fuck Up at the Same Time', 'http://hueniverse.com/2016/01/26/how-to-use-open-source-and-shut-the-fuck-up-at-the-same-time/', '39', '10', 'josep2', '1/26/2016 19:30'], ['11964716', "Florida DJs May Face Felony for April Fools' Water Joke", 'http://www.thewire.com/entertainment/2013/04/florida-djs-april-fools-water-joke/63798/', '2', '1', 'vezycash', '6/23/2016 22:20'], ['11919867', 'Technology ventures: From Idea to Enterprise', 'https://www.amazon.com/Technology-Ventures-Enterprise-Thomas-Byers/dp/0073523429', '3', '1', 'hswarna', '6/17/2016 0:01']]
ask_posts = []
show_posts = []
other_posts = []
for row in hn:
title = row[1].lower()
if title.startswith('ask hn'):
ask_posts.append(row)
elif title.startswith('show hn'):
show_posts.append(row)
else:
other_posts.append(row)
print(len(ask_posts))
print(len(show_posts))
print(len(other_posts))
1744 1162 17194
print(ask_posts[:5])
[['12296411', 'Ask HN: How to improve my personal website?', '', '2', '6', 'ahmedbaracat', '8/16/2016 9:55'], ['10610020', 'Ask HN: Am I the only one outraged by Twitter shutting down share counts?', '', '28', '29', 'tkfx', '11/22/2015 13:43'], ['11610310', 'Ask HN: Aby recent changes to CSS that broke mobile?', '', '1', '1', 'polskibus', '5/2/2016 10:14'], ['12210105', 'Ask HN: Looking for Employee #3 How do I do it?', '', '1', '3', 'sph130', '8/2/2016 14:20'], ['10394168', 'Ask HN: Someone offered to buy my browser extension from me. What now?', '', '28', '17', 'roykolak', '10/15/2015 16:38']]
print(show_posts[:5])
[['10627194', 'Show HN: Wio Link ESP8266 Based Web of Things Hardware Development Platform', 'https://iot.seeed.cc', '26', '22', 'kfihihc', '11/25/2015 14:03'], ['10646440', 'Show HN: Something pointless I made', 'http://dn.ht/picklecat/', '747', '102', 'dhotson', '11/29/2015 22:46'], ['11590768', 'Show HN: Shanhu.io, a programming playground powered by e8vm', 'https://shanhu.io', '1', '1', 'h8liu', '4/28/2016 18:05'], ['12178806', 'Show HN: Webscope Easy way for web developers to communicate with Clients', 'http://webscopeapp.com', '3', '3', 'fastbrick', '7/28/2016 7:11'], ['10872799', 'Show HN: GeoScreenshot Easily test Geo-IP based web pages', 'https://www.geoscreenshot.com/', '1', '9', 'kpsychwave', '1/9/2016 20:45']]
print(headers)
['id', 'title', 'url', 'num_points', 'num_comments', 'author', 'created_at']
total_ask_comments = 0
for row in ask_posts:
comments = row[4]
comments = int(comments)
total_ask_comments +=comments
avg_ask_comments = total_ask_comments/len(ask_posts)
print(avg_ask_comments)
14.038417431192661
total_show_comments = 0
for row in show_posts:
comments = row[4]
comments = int(comments)
total_show_comments +=comments
avg_show_comments = total_show_comments/len(show_posts)
print(avg_show_comments)
10.31669535283993
print(headers)
['id', 'title', 'url', 'num_points', 'num_comments', 'author', 'created_at']
print(ask_posts[0])
['12296411', 'Ask HN: How to improve my personal website?', '', '2', '6', 'ahmedbaracat', '8/16/2016 9:55']
import datetime as dt
#a list of list; 1st element: time of creation;
#2nd element: no of comments
result_list = []
for row in ask_posts:
create_time = row[6]
num_comment = int(row[4])
result_list.append([create_time,num_comment])
print(result_list[:2])
[['8/16/2016 9:55', 6], ['11/22/2015 13:43', 29]]
counts_by_hour = {} #num of ask post each hour
comments_by_hour = {} #num of comments ask posts created each hour
for row in result_list:
date = row[0]
num_comment = row[1]
date_object = dt.datetime.strptime(date,"%m/%d/%Y %H:%M")
hour = date_object.hour
if hour not in counts_by_hour:
counts_by_hour[hour] =1
comments_by_hour[hour] =num_comment
else:
counts_by_hour[hour] +=1
comments_by_hour[hour] +=num_comment
print(counts_by_hour)
{9: 45, 13: 85, 10: 59, 14: 107, 16: 108, 23: 68, 12: 73, 17: 100, 15: 116, 21: 109, 20: 80, 2: 58, 18: 109, 3: 54, 5: 46, 19: 110, 1: 60, 22: 71, 8: 48, 4: 47, 0: 55, 6: 44, 7: 34, 11: 58}
print(comments_by_hour)
{9: 251, 13: 1253, 10: 793, 14: 1416, 16: 1814, 23: 543, 12: 687, 17: 1146, 15: 4477, 21: 1745, 20: 1722, 2: 1381, 18: 1439, 3: 421, 5: 464, 19: 1188, 1: 683, 22: 479, 8: 492, 4: 337, 0: 447, 6: 397, 7: 267, 11: 641}
avg_by_hour = []
# comment per post
# num of comment / num of post
for i in counts_by_hour:
avg_by_hour.append([i,comments_by_hour[i]/counts_by_hour[i]])
print(avg_by_hour)
[[9, 5.5777777777777775], [13, 14.741176470588234], [10, 13.440677966101696], [14, 13.233644859813085], [16, 16.796296296296298], [23, 7.985294117647059], [12, 9.41095890410959], [17, 11.46], [15, 38.5948275862069], [21, 16.009174311926607], [20, 21.525], [2, 23.810344827586206], [18, 13.20183486238532], [3, 7.796296296296297], [5, 10.08695652173913], [19, 10.8], [1, 11.383333333333333], [22, 6.746478873239437], [8, 10.25], [4, 7.170212765957447], [0, 8.127272727272727], [6, 9.022727272727273], [7, 7.852941176470588], [11, 11.051724137931034]]
# trying to create another list of list,
# which the first element of each list is the avg num of count,
# to faciliate the sorting function,
# which the function sort the list of list by the first element
swap_avg_by_hour = []
for i in avg_by_hour:
swap_avg_by_hour.append([i[1],i[0]])
print(swap_avg_by_hour)
[[5.5777777777777775, 9], [14.741176470588234, 13], [13.440677966101696, 10], [13.233644859813085, 14], [16.796296296296298, 16], [7.985294117647059, 23], [9.41095890410959, 12], [11.46, 17], [38.5948275862069, 15], [16.009174311926607, 21], [21.525, 20], [23.810344827586206, 2], [13.20183486238532, 18], [7.796296296296297, 3], [10.08695652173913, 5], [10.8, 19], [11.383333333333333, 1], [6.746478873239437, 22], [10.25, 8], [7.170212765957447, 4], [8.127272727272727, 0], [9.022727272727273, 6], [7.852941176470588, 7], [11.051724137931034, 11]]
sorted_swap = sorted(swap_avg_by_hour,reverse = True)
print(sorted_swap[:5])
[[38.5948275862069, 15], [23.810344827586206, 2], [21.525, 20], [16.796296296296298, 16], [16.009174311926607, 21]]
for i in sorted_swap[:5]:
full_date_object = dt.datetime.strptime(str(i[1]),"%H")
hour_object = dt.datetime.strftime(full_date_object,"%H"+":00")
count = round(i[0],2)
print("{}: {} average comments per post".format(hour_object,count))
15:00: 38.59 average comments per post 02:00: 23.81 average comments per post 20:00: 21.52 average comments per post 16:00: 16.8 average comments per post 21:00: 16.01 average comments per post