My Title
from csv import reader
open_file = open('hacker_news.csv')
read_file = reader(open_file)
hn = list(read_file)
print(hn[:5])
[['id', 'title', 'url', 'num_points', 'num_comments', 'author', 'created_at'], ['12224879', 'Interactive Dynamic Video', 'http://www.interactivedynamicvideo.com/', '386', '52', 'ne0phyte', '8/4/2016 11:52'], ['10975351', 'How to Use Open Source and Shut the Fuck Up at the Same Time', 'http://hueniverse.com/2016/01/26/how-to-use-open-source-and-shut-the-fuck-up-at-the-same-time/', '39', '10', 'josep2', '1/26/2016 19:30'], ['11964716', "Florida DJs May Face Felony for April Fools' Water Joke", 'http://www.thewire.com/entertainment/2013/04/florida-djs-april-fools-water-joke/63798/', '2', '1', 'vezycash', '6/23/2016 22:20'], ['11919867', 'Technology ventures: From Idea to Enterprise', 'https://www.amazon.com/Technology-Ventures-Enterprise-Thomas-Byers/dp/0073523429', '3', '1', 'hswarna', '6/17/2016 0:01']]
headers = hn[0]
hn = hn[1:]
ask_posts = []
show_posts = []
other_posts = []
for row in hn:
title = row[1]
if title.lower().startswith('ask hn'):
ask_posts.append(row)
elif title.lower().startswith("show hn"):
show_posts.append(row)
else:
other_posts.append(row)
total_ask_comments = 0
for comments in ask_posts:
x = int(comments[4])
total_ask_comments += x
avg_ask_comments = total_ask_comments / len(ask_posts)
print("The average comments for Ask is", avg_ask_comments)
total_show_comments = 0
for comments in show_posts:
x = int(comments[4])
total_show_comments += x
avg_show_comments = total_show_comments / len(show_posts)
print("The average comments for Ask is", avg_show_comments)
the average comments for Ask is 14.038417431192661 the average comments for Ask is 10.31669535283993
The above code shows that the number of average comments received by Ask HN is greater than the Show HN.
import datetime as dt
result_list = []
for row in ask_posts:
created_at = row[6]
num_comments = int(row[4])
result_list.append([created_at, num_comments])
print(result_list[:5])
[['8/16/2016 9:55', 6], ['11/22/2015 13:43', 29], ['5/2/2016 10:14', 1], ['8/2/2016 14:20', 3], ['10/15/2015 16:38', 17]]
counts_by_hour = {}
comments_by_hour = {}
for x in result_list:
lst_date = x[0]
time_hour = ('%m/%d/%Y %H:%M')
dt_object = dt.datetime.strptime(lst_date, time_hour)
dt_string = dt_object.strftime('%H')
if dt_string not in counts_by_hour:
counts_by_hour[dt_string] = 1
comments_by_hour[dt_string] = x[1]
else:
counts_by_hour[dt_string] += 1
comments_by_hour[dt_string] += x[1]
print(counts_by_hour)
print(comments_by_hour)
{'09': 45, '13': 85, '10': 59, '14': 107, '16': 108, '23': 68, '12': 73, '17': 100, '15': 116, '21': 109, '20': 80, '02': 58, '18': 109, '03': 54, '05': 46, '19': 110, '01': 60, '22': 71, '08': 48, '04': 47, '00': 55, '06': 44, '07': 34, '11': 58} {'09': 251, '13': 1253, '10': 793, '14': 1416, '16': 1814, '23': 543, '12': 687, '17': 1146, '15': 4477, '21': 1745, '20': 1722, '02': 1381, '18': 1439, '03': 421, '05': 464, '19': 1188, '01': 683, '22': 479, '08': 492, '04': 337, '00': 447, '06': 397, '07': 267, '11': 641}
avg_cmts_hour = []
for x in counts_by_hour:
avg_cmts_hour.append([x,round(comments_by_hour[x]/counts_by_hour[x], 2)])
print(avg_cmts_hour)
[['09', 5.58], ['13', 14.74], ['10', 13.44], ['14', 13.23], ['16', 16.8], ['23', 7.99], ['12', 9.41], ['17', 11.46], ['15', 38.59], ['21', 16.01], ['20', 21.52], ['02', 23.81], ['18', 13.2], ['03', 7.8], ['05', 10.09], ['19', 10.8], ['01', 11.38], ['22', 6.75], ['08', 10.25], ['04', 7.17], ['00', 8.13], ['06', 9.02], ['07', 7.85], ['11', 11.05]]
swap_avg_by_hour = []
for lst in avg_cmts_hour:
x = lst[0]
y = lst[1]
swap_avg_by_hour.append([y, x])
sorted_swap = sorted(swap_avg_by_hour, reverse=True)
print(sorted_swap[:5])
[[38.59, '15'], [23.81, '02'], [21.52, '20'], [16.8, '16'], [16.01, '21']]
for x in sorted_swap:
a = x[0]
b = x[1]
fmt_hour = dt.datetime.strptime(b, '%H')
fmtd_hour = fmt_hour.strftime('%H:%M')
print("{}:{} average comments per post".format(fmtd_hour, a))
15:00:38.59 average comments per post 02:00:23.81 average comments per post 20:00:21.52 average comments per post 16:00:16.8 average comments per post 21:00:16.01 average comments per post 13:00:14.74 average comments per post 10:00:13.44 average comments per post 14:00:13.23 average comments per post 18:00:13.2 average comments per post 17:00:11.46 average comments per post 01:00:11.38 average comments per post 11:00:11.05 average comments per post 19:00:10.8 average comments per post 08:00:10.25 average comments per post 05:00:10.09 average comments per post 12:00:9.41 average comments per post 06:00:9.02 average comments per post 00:00:8.13 average comments per post 23:00:7.99 average comments per post 07:00:7.85 average comments per post 03:00:7.8 average comments per post 04:00:7.17 average comments per post 22:00:6.75 average comments per post 09:00:5.58 average comments per post