from csv import reader # import reader
opened = open("hacker_news.csv") # open the file using open()
read = reader(opened) # read the file with the help of the reader fn that was imported
hn = list(read) # convert to list of list
header = hn[0] # assigning the first row to header
hn = hn[1:] # assigning the list of list to hn without the header row
print(header) # to print the header
print(hn[:5]) # to print the first five row
['id', 'title', 'url', 'num_points', 'num_comments', 'author', 'created_at'] [['12224879', 'Interactive Dynamic Video', 'http://www.interactivedynamicvideo.com/', '386', '52', 'ne0phyte', '8/4/2016 11:52'], ['10975351', 'How to Use Open Source and Shut the Fuck Up at the Same Time', 'http://hueniverse.com/2016/01/26/how-to-use-open-source-and-shut-the-fuck-up-at-the-same-time/', '39', '10', 'josep2', '1/26/2016 19:30'], ['11964716', "Florida DJs May Face Felony for April Fools' Water Joke", 'http://www.thewire.com/entertainment/2013/04/florida-djs-april-fools-water-joke/63798/', '2', '1', 'vezycash', '6/23/2016 22:20'], ['11919867', 'Technology ventures: From Idea to Enterprise', 'https://www.amazon.com/Technology-Ventures-Enterprise-Thomas-Byers/dp/0073523429', '3', '1', 'hswarna', '6/17/2016 0:01'], ['10301696', 'Note by Note: The Making of Steinway L1037 (2007)', 'http://www.nytimes.com/2007/11/07/movies/07stein.html?_r=0', '8', '2', 'walterbell', '9/30/2015 4:12']]
ask_posts = []
show_posts = []
other_posts = []
for row in hn:
title = row[1]
title = title.lower()
if title.startswith("ask hn"):
ask_posts.append(row)
elif title.startswith("show hn"):
show_posts.append(row)
else :
other_posts.append(row)
print(len(ask_posts))
print(len(show_posts))
print(len(other_posts))
1744 1162 17194
total_ask_comments = 0
for row in ask_posts:
num_comments = int(row[4])
total_ask_comments += num_comments
avg_ask_comments = total_ask_comments / len(ask_posts)
total_show_comments = 0
for row in show_posts:
num_comments = int(row[4])
total_show_comments += num_comments
avg_show_comments = total_show_comments / len(show_posts)
print("average of ask posts comments",":",avg_ask_comments)
print("average of show posts comments",":",avg_show_comments)
average of ask posts comments : 14.038417431192661 average of show posts comments : 10.31669535283993
The code above shows the avereage comments for show HN posts and ask HN posts. It shows that the average of ask HN posts comments is approximate of 14.04 while the average of show HN posts comments is approximate of 10.32 which means the average of ask HN posts is more than show HN posts.
Therefore, ask HN post recieve more comment than show HN post
This is done only for Ask HN post since it recieve the highest comments
import datetime as dt
result_list = []
for row in ask_posts:
created_at = row[6]
num_comments = int(row[4])
listed = [created_at,num_comments]
result_list.append(listed)
counts_by_hour = {}
comments_by_hour = {}
form = "%m/%d/%Y %H:%M"
for row in result_list:
num_comment = row[1]
string = row[0]
time = dt.datetime.strptime(string,form)
hour = time.strftime("%H")
if hour in counts_by_hour:
counts_by_hour[hour] += 1
comments_by_hour[hour] += num_comment
else :
counts_by_hour[hour] = 1
comments_by_hour[hour] = num_comment
comments_by_hour
{'09': 251, '13': 1253, '10': 793, '14': 1416, '16': 1814, '23': 543, '12': 687, '17': 1146, '15': 4477, '21': 1745, '20': 1722, '02': 1381, '18': 1439, '03': 421, '05': 464, '19': 1188, '01': 683, '22': 479, '08': 492, '04': 337, '00': 447, '06': 397, '07': 267, '11': 641}
avg_by_hour = []
for row in comments_by_hour:
avg = comments_by_hour[row]/counts_by_hour[row]
avg_by_hour.append([row,avg])
avg_by_hour
[['09', 5.5777777777777775], ['13', 14.741176470588234], ['10', 13.440677966101696], ['14', 13.233644859813085], ['16', 16.796296296296298], ['23', 7.985294117647059], ['12', 9.41095890410959], ['17', 11.46], ['15', 38.5948275862069], ['21', 16.009174311926607], ['20', 21.525], ['02', 23.810344827586206], ['18', 13.20183486238532], ['03', 7.796296296296297], ['05', 10.08695652173913], ['19', 10.8], ['01', 11.383333333333333], ['22', 6.746478873239437], ['08', 10.25], ['04', 7.170212765957447], ['00', 8.127272727272727], ['06', 9.022727272727273], ['07', 7.852941176470588], ['11', 11.051724137931034]]
swap_avg_by_hour = []
for row in avg_by_hour:
swap_avg_by_hour.append([row[1],row[0]])
swap_avg_by_hour
[[5.5777777777777775, '09'], [14.741176470588234, '13'], [13.440677966101696, '10'], [13.233644859813085, '14'], [16.796296296296298, '16'], [7.985294117647059, '23'], [9.41095890410959, '12'], [11.46, '17'], [38.5948275862069, '15'], [16.009174311926607, '21'], [21.525, '20'], [23.810344827586206, '02'], [13.20183486238532, '18'], [7.796296296296297, '03'], [10.08695652173913, '05'], [10.8, '19'], [11.383333333333333, '01'], [6.746478873239437, '22'], [10.25, '08'], [7.170212765957447, '04'], [8.127272727272727, '00'], [9.022727272727273, '06'], [7.852941176470588, '07'], [11.051724137931034, '11']]
sorted_swap = sorted(swap_avg_by_hour,reverse=True)
sorted_swap
[[38.5948275862069, '15'], [23.810344827586206, '02'], [21.525, '20'], [16.796296296296298, '16'], [16.009174311926607, '21'], [14.741176470588234, '13'], [13.440677966101696, '10'], [13.233644859813085, '14'], [13.20183486238532, '18'], [11.46, '17'], [11.383333333333333, '01'], [11.051724137931034, '11'], [10.8, '19'], [10.25, '08'], [10.08695652173913, '05'], [9.41095890410959, '12'], [9.022727272727273, '06'], [8.127272727272727, '00'], [7.985294117647059, '23'], [7.852941176470588, '07'], [7.796296296296297, '03'], [7.170212765957447, '04'], [6.746478873239437, '22'], [5.5777777777777775, '09']]
print("Top 5 Hours for Ask Posts Comments")
for avg,hr in sorted_swap[:5]:
print("{} : {:.2f} average comments per post".format(dt.datetime.strptime(hr,"%H").strftime("%H:%M"), avg))
Top 5 Hours for Ask Posts Comments 15:00 : 38.59 average comments per post 02:00 : 23.81 average comments per post 20:00 : 21.52 average comments per post 16:00 : 16.80 average comments per post 21:00 : 16.01 average comments per post
The hour that has the highest average comments is 15:00 which is 3:00pm WAT with 38.59 average comments per post which is about 60% difference compare to the hour that has the second highest average comments.
Therefore, hour you should create a post to have a high chance of recieving comment is from 15:00 to 16:00 (i.e 3:00pm - 4:00pm)
From the data,