In [1]:
import pandas as pd
In [2]:
#Read the csv file
titanic_df = pd.read_csv('titanic.csv')

#It's a big file so let's extract a small data out of it
df = titanic_df.loc[[0,1,2,3,4,5],['name','sex','age','fare']]
df
Out[2]:
name sex age fare
0 Allen, Miss. Elisabeth Walton female 29.0000 211.3375
1 Allison, Master. Hudson Trevor male 0.9167 151.5500
2 Allison, Miss. Helen Loraine female 2.0000 151.5500
3 Allison, Mr. Hudson Joshua Creighton male 30.0000 151.5500
4 Allison, Mrs. Hudson J C (Bessie Waldo Daniels) female 25.0000 151.5500
5 Anderson, Mr. Harry male 48.0000 26.5500
In [3]:
#Let's just print the name
df.name
Out[3]:
0                      Allen, Miss. Elisabeth Walton
1                     Allison, Master. Hudson Trevor
2                       Allison, Miss. Helen Loraine
3               Allison, Mr. Hudson Joshua Creighton
4    Allison, Mrs. Hudson J C (Bessie Waldo Daniels)
5                                Anderson, Mr. Harry
Name: name, dtype: object
In [4]:
#we can print the name using loc also
df.loc[:,'name']
Out[4]:
0                      Allen, Miss. Elisabeth Walton
1                     Allison, Master. Hudson Trevor
2                       Allison, Miss. Helen Loraine
3               Allison, Mr. Hudson Joshua Creighton
4    Allison, Mrs. Hudson J C (Bessie Waldo Daniels)
5                                Anderson, Mr. Harry
Name: name, dtype: object
In [5]:
#iloc can be used with positional integers
#First two rows and all columns
df.iloc[0:2,:]
Out[5]:
name sex age fare
0 Allen, Miss. Elisabeth Walton female 29.0000 211.3375
1 Allison, Master. Hudson Trevor male 0.9167 151.5500
In [6]:
#all rows and last column
df.iloc[:,-1]
Out[6]:
0    211.3375
1    151.5500
2    151.5500
3    151.5500
4    151.5500
5     26.5500
Name: fare, dtype: float64
In [7]:
#show the index
df.index
Out[7]:
Int64Index([0, 1, 2, 3, 4, 5], dtype='int64')
In [8]:
#using criteria to filter
df[df.sex == 'female']
Out[8]:
name sex age fare
0 Allen, Miss. Elisabeth Walton female 29.0 211.3375
2 Allison, Miss. Helen Loraine female 2.0 151.5500
4 Allison, Mrs. Hudson J C (Bessie Waldo Daniels) female 25.0 151.5500
In [9]:
df[df.index == 2]
Out[9]:
name sex age fare
2 Allison, Miss. Helen Loraine female 2.0 151.55
In [10]:
#we can use conditions
df[(df.sex == 'female') & (df.age >= 20)]
Out[10]:
name sex age fare
0 Allen, Miss. Elisabeth Walton female 29.0 211.3375
4 Allison, Mrs. Hudson J C (Bessie Waldo Daniels) female 25.0 151.5500