In this project, we'll pretend to analyze the hacker news dataset contain top news from the user submitted data.
The aim of this project is to analyze the data from hacker news posts dataset in csv.
from csv import reader
opened_file=open("hacker_news.csv")
read_file=reader(opened_file)
hn=list(read_file)
print("First five rows of hacker news dataset is\n\n",hn[0:5])
headers=hn[0]
hn=hn[1:]
print("Header of the hacker news dataset is \n\n",headers)
print("First five rows of the dataset without header \n\n",hn)
NameErrorTraceback (most recent call last) <ipython-input-1-d3e91822a63a> in <module>() ----> 1 headers=hn[0] 2 hn=hn[1:] 3 4 print("Header of the hacker news dataset is \n\n",headers) 5 print("First five rows of the dataset without header \n\n",hn) NameError: name 'hn' is not defined
ask_posts=[]
show_posts=[]
other_posts=[]
for row in hn:
title=row[1]
title=title.lower()
if title.startswith('ask hn'):
ask_posts.append(row)
elif title.startswith('show hn'):
show_posts.append(row)
else:
other_posts.append(row)
print('\nTotal number of ask hn posts are :',len(ask_posts))
print('\nTotal number of show hn posts are :',len(show_posts))
print('\nTotal number of posts not either ask hn or show hn are :',len(other_posts))