import pandas as pd
#Read the csv file
titanic_df = pd.read_csv('titanic.csv')
#It's a big file so let's extract a small data out of it
df = titanic_df.loc[[0,1,2,3,4,5],['name','sex','age','fare']]
df
name | sex | age | fare | |
---|---|---|---|---|
0 | Allen, Miss. Elisabeth Walton | female | 29.0000 | 211.3375 |
1 | Allison, Master. Hudson Trevor | male | 0.9167 | 151.5500 |
2 | Allison, Miss. Helen Loraine | female | 2.0000 | 151.5500 |
3 | Allison, Mr. Hudson Joshua Creighton | male | 30.0000 | 151.5500 |
4 | Allison, Mrs. Hudson J C (Bessie Waldo Daniels) | female | 25.0000 | 151.5500 |
5 | Anderson, Mr. Harry | male | 48.0000 | 26.5500 |
#Let's just print the name
df.name
0 Allen, Miss. Elisabeth Walton 1 Allison, Master. Hudson Trevor 2 Allison, Miss. Helen Loraine 3 Allison, Mr. Hudson Joshua Creighton 4 Allison, Mrs. Hudson J C (Bessie Waldo Daniels) 5 Anderson, Mr. Harry Name: name, dtype: object
#we can print the name using loc also
df.loc[:,'name']
0 Allen, Miss. Elisabeth Walton 1 Allison, Master. Hudson Trevor 2 Allison, Miss. Helen Loraine 3 Allison, Mr. Hudson Joshua Creighton 4 Allison, Mrs. Hudson J C (Bessie Waldo Daniels) 5 Anderson, Mr. Harry Name: name, dtype: object
#iloc can be used with positional integers
#First two rows and all columns
df.iloc[0:2,:]
name | sex | age | fare | |
---|---|---|---|---|
0 | Allen, Miss. Elisabeth Walton | female | 29.0000 | 211.3375 |
1 | Allison, Master. Hudson Trevor | male | 0.9167 | 151.5500 |
#all rows and last column
df.iloc[:,-1]
0 211.3375 1 151.5500 2 151.5500 3 151.5500 4 151.5500 5 26.5500 Name: fare, dtype: float64
#show the index
df.index
Int64Index([0, 1, 2, 3, 4, 5], dtype='int64')
#using criteria to filter
df[df.sex == 'female']
name | sex | age | fare | |
---|---|---|---|---|
0 | Allen, Miss. Elisabeth Walton | female | 29.0 | 211.3375 |
2 | Allison, Miss. Helen Loraine | female | 2.0 | 151.5500 |
4 | Allison, Mrs. Hudson J C (Bessie Waldo Daniels) | female | 25.0 | 151.5500 |
df[df.index == 2]
name | sex | age | fare | |
---|---|---|---|---|
2 | Allison, Miss. Helen Loraine | female | 2.0 | 151.55 |
#we can use conditions
df[(df.sex == 'female') & (df.age >= 20)]
name | sex | age | fare | |
---|---|---|---|---|
0 | Allen, Miss. Elisabeth Walton | female | 29.0 | 211.3375 |
4 | Allison, Mrs. Hudson J C (Bessie Waldo Daniels) | female | 25.0 | 151.5500 |