We will define rating of comments and compare them
# Opening and reading a file
from csv import reader
opened_file = open('hacker_news.csv')
read_file = reader(opened_file)
hn = list(read_file)
headers = hn[:1]
hn = hn[1:]
# Defining ask and show posts. unkown posts go to other post
ask_posts = []
show_posts = []
other_posts = []
for row in hn:
title = row[1]
if title.lower().startswith ('ask hn'):
ask_posts.append(row)
elif title.lower().startswith ('show hn'):
show_posts.append(row)
else:
other_posts.append(row)
print (headers)
print (ask_posts[0:3])
[['id', 'title', 'url', 'num_points', 'num_comments', 'author', 'created_at']] [['12296411', 'Ask HN: How to improve my personal website?', '', '2', '6', 'ahmedbaracat', '8/16/2016 9:55'], ['10610020', 'Ask HN: Am I the only one outraged by Twitter shutting down share counts?', '', '28', '29', 'tkfx', '11/22/2015 13:43'], ['11610310', 'Ask HN: Aby recent changes to CSS that broke mobile?', '', '1', '1', 'polskibus', '5/2/2016 10:14']]
# Calculating comments for each posts
# Total and average comments for ask posts
total_ask_comments = 0
avg_ask_comments = 0
for row in ask_posts:
num_comments = int(row[4])
total_ask_comments+= num_comments
avg_ask_comments = total_ask_comments/len(ask_posts)
# Total and average comments for show posts
total_show_comments = 0
avg_show_comments = 0
for row in show_posts:
num_comments = int(row[4])
total_show_comments+= num_comments
avg_show_comments = total_show_comments/len(show_posts)
print (round (avg_ask_comments,2))
print (round (avg_show_comments,2))
14.04 10.32
Ask posts receive more comments on average comparing to show posts
import datetime as dt
result_list = []
for row in ask_posts:
created_at = row[6]
num_comments = int(row[4])
result_list.append ([created_at,num_comments])
counts_by_hour = {}
comments_by_hour = {}
for row in result_list:
date = row[0]
comment = row[1]
hour = dt.datetime.strptime(date, '%m/%d/%Y %H:%M').strftime('%H')
if hour in counts_by_hour:
counts_by_hour[hour]+=1
comments_by_hour[hour]+= comment
else:
counts_by_hour[hour]=1
comments_by_hour[hour]=comment
{'09': 45, '13': 85, '10': 59, '14': 107, '16': 108, '23': 68, '12': 73, '17': 100, '15': 116, '21': 109, '20': 80, '02': 58, '18': 109, '03': 54, '05': 46, '19': 110, '01': 60, '22': 71, '08': 48, '04': 47, '00': 55, '06': 44, '07': 34, '11': 58}
avg_by_hour = []
for hr in comments_by_hour:
avg_by_hour.append([hr, comments_by_hour[hr]/counts_by_hour[hr]])
avg_by_hour[:5]
swap_avg_by_hour = []
for row in avg_by_hour:
swap_avg_by_hour.append([row[1],row[0]])
sorted_swap = sorted(swap_avg_by_hour, reverse= True)
print ("Top 5 Hours for Ask Posts Comments")
# for row in sorted_swap:
# formatted ="{}:00 {:.2f} average comments per post".format(row[1],row[0])
# print (formatted)
for avg,hr in sorted_swap[:5]:
formatted ="{}: {:.2f} average comments per post".format(dt.datetime.strptime(hr,'%H').strftime('%H:%M'),avg)
print (formatted)
total_show_points = 0
total_ask_points = 0
for row in show_posts:
total_show_points += int(row[3])
for row in ask_posts:
total_ask_points += int(row[3])
print ("Average point for show posts is: ", round(total_show_points/len(show_posts),2))
print ("Average point for ask posts is: ", round(total_ask_points/len(ask_posts),2))
Average point for show posts is: 27.56 Average point for ask posts is: 15.06
import datetime as dt
# ask_count_by_hour = {}
# show_count_by_hour = {}
ask_point_by_hour = {}
show_point_by_hour = {}
# Sum of points for ask posts by hour
for row in ask_posts:
created_at = dt.datetime.strptime(row[6], "%m/%d/%Y %H:%M").strftime('%H')
points_by_hour = int(row[3])
if created_at in ask_point_by_hour:
ask_point_by_hour[created_at]+= points_by_hour
else:
ask_point_by_hour[created_at]= points_by_hour
# Sum of points for show posts by hour
for row in show_posts:
created_at = dt.datetime.strptime(row[6], "%m/%d/%Y %H:%M").strftime('%H')
points_by_hour = int(row[3])
if created_at in show_point_by_hour:
show_point_by_hour[created_at]+= points_by_hour
else:
show_point_by_hour[created_at]= points_by_hour
# Swapping point and hour
swap_ask_point = []
swap_show_point = []
for row in ask_point_by_hour:
swap_ask_point.append([ask_point_by_hour[row],row])
for row in show_point_by_hour:
swap_show_point.append ([show_point_by_hour[row],row])
# Sorting by point in descending order
sorted_ask_point = sorted(swap_ask_point, reverse = True)
sorted_show_point = sorted(swap_show_point, reverse = True)
# Displaying first lines of sorted lists in order to see differences
for point,hr in sorted_ask_point[:5]:
disp_format = "In {} total Ask post points are {}".format(dt.datetime.strptime(hr,"%H").strftime("%H:%M"),point)
print (disp_format)
print('\n')
for point,hr in sorted_show_point[:5]:
disp_format = "In {} total Show post points are {}".format(dt.datetime.strptime(hr,"%H").strftime("%H:%M"),point)
print (disp_format)
In 15:00 total Ask post points are 3479 In 16:00 total Ask post points are 2522 In 13:00 total Ask post points are 2062 In 17:00 total Ask post points are 1941 In 18:00 total Ask post points are 1741 In 16:00 total Show post points are 2634 In 12:00 total Show post points are 2543 In 17:00 total Show post points are 2521 In 13:00 total Show post points are 2438 In 15:00 total Show post points are 2228
Results show the best hours for Ask Posts to get better points is between 15.00 and 16.00 for Show points these hours are 15.00-17.00. Resuming, I can say that it is vivid from results it is almost the same hours for both Ask and Show posts to get better points. It could lead us to post at these hours