In [2]:
import pandas as pd
In [3]:
#Read the csv file
titanic_df = pd.read_csv('titanic.csv')

#It's a big file so let's extract a small data out of it
df = titanic_df.loc[[0,1,2,3,4,5],['name','sex','age','fare']]
df
Out[3]:
name sex age fare
0 Allen, Miss. Elisabeth Walton female 29.0000 211.3375
1 Allison, Master. Hudson Trevor male 0.9167 151.5500
2 Allison, Miss. Helen Loraine female 2.0000 151.5500
3 Allison, Mr. Hudson Joshua Creighton male 30.0000 151.5500
4 Allison, Mrs. Hudson J C (Bessie Waldo Daniels) female 25.0000 151.5500
5 Anderson, Mr. Harry male 48.0000 26.5500
In [10]:
#Size of dataframe
df.shape
Out[10]:
(6, 4)
In [11]:
#Transposing a dataframe
df.T
Out[11]:
0 1 2 3 4 5
name Allen, Miss. Elisabeth Walton Allison, Master. Hudson Trevor Allison, Miss. Helen Loraine Allison, Mr. Hudson Joshua Creighton Allison, Mrs. Hudson J C (Bessie Waldo Daniels) Anderson, Mr. Harry
sex female male female male female male
age 29 0.9167 2 30 25 48
fare 211.338 151.55 151.55 151.55 151.55 26.55
In [7]:
#Dropping a column
#axis=1 is for column
df.drop(['age'], axis=1)
Out[7]:
name sex fare
0 Allen, Miss. Elisabeth Walton female 211.3375
1 Allison, Master. Hudson Trevor male 151.5500
2 Allison, Miss. Helen Loraine female 151.5500
3 Allison, Mr. Hudson Joshua Creighton male 151.5500
4 Allison, Mrs. Hudson J C (Bessie Waldo Daniels) female 151.5500
5 Anderson, Mr. Harry male 26.5500
In [8]:
#If you dont'put inplace the orginical df remains same
df
Out[8]:
name sex age fare
0 Allen, Miss. Elisabeth Walton female 29.0000 211.3375
1 Allison, Master. Hudson Trevor male 0.9167 151.5500
2 Allison, Miss. Helen Loraine female 2.0000 151.5500
3 Allison, Mr. Hudson Joshua Creighton male 30.0000 151.5500
4 Allison, Mrs. Hudson J C (Bessie Waldo Daniels) female 25.0000 151.5500
5 Anderson, Mr. Harry male 48.0000 26.5500
In [9]:
#Dropping a row
#axis=0 for row
df.drop([1], axis=0)
Out[9]:
name sex age fare
0 Allen, Miss. Elisabeth Walton female 29.0 211.3375
2 Allison, Miss. Helen Loraine female 2.0 151.5500
3 Allison, Mr. Hudson Joshua Creighton male 30.0 151.5500
4 Allison, Mrs. Hudson J C (Bessie Waldo Daniels) female 25.0 151.5500
5 Anderson, Mr. Harry male 48.0 26.5500
In [14]:
#Scalar addition
df['big_age'] = df['age'] + 10
df
Out[14]:
name sex age fare big_age
0 Allen, Miss. Elisabeth Walton female 29.0000 211.3375 39.0000
1 Allison, Master. Hudson Trevor male 0.9167 151.5500 10.9167
2 Allison, Miss. Helen Loraine female 2.0000 151.5500 12.0000
3 Allison, Mr. Hudson Joshua Creighton male 30.0000 151.5500 40.0000
4 Allison, Mrs. Hudson J C (Bessie Waldo Daniels) female 25.0000 151.5500 35.0000
5 Anderson, Mr. Harry male 48.0000 26.5500 58.0000
In [17]:
#we can add two columns
#Think it as matrix addition
df['bigger_age'] = df['age'] + df['big_age']
df
Out[17]:
name sex age fare big_age bigger_age
0 Allen, Miss. Elisabeth Walton female 29.0000 211.3375 39.0000 68.0000
1 Allison, Master. Hudson Trevor male 0.9167 151.5500 10.9167 11.8334
2 Allison, Miss. Helen Loraine female 2.0000 151.5500 12.0000 14.0000
3 Allison, Mr. Hudson Joshua Creighton male 30.0000 151.5500 40.0000 70.0000
4 Allison, Mrs. Hudson J C (Bessie Waldo Daniels) female 25.0000 151.5500 35.0000 60.0000
5 Anderson, Mr. Harry male 48.0000 26.5500 58.0000 106.0000
In [19]:
#Scalar multiplication
df['biggest_age'] = df['age']*10
df
Out[19]:
name sex age fare big_age bigger_age biggest_age
0 Allen, Miss. Elisabeth Walton female 29.0000 211.3375 39.0000 68.0000 290.000
1 Allison, Master. Hudson Trevor male 0.9167 151.5500 10.9167 11.8334 9.167
2 Allison, Miss. Helen Loraine female 2.0000 151.5500 12.0000 14.0000 20.000
3 Allison, Mr. Hudson Joshua Creighton male 30.0000 151.5500 40.0000 70.0000 300.000
4 Allison, Mrs. Hudson J C (Bessie Waldo Daniels) female 25.0000 151.5500 35.0000 60.0000 250.000
5 Anderson, Mr. Harry male 48.0000 26.5500 58.0000 106.0000 480.000