#!/usr/bin/env python # coding: utf-8 # In[37]: import re import numpy as np import pandas as pd # In[50]: numlist=["$10000","$20,000","30,000",40000,"50000 "] # In[51]: for i,value in enumerate(numlist): numlist[i]=re.sub(r"([$,])","",str(value)) # In[52]: numlist # In[54]: int(numlist[1]) # In[56]: for i,value in enumerate(numlist): numlist[i]=int(value) # In[57]: numlist # In[58]: np.mean(numlist) # In[59]: numlist2=str(numlist) # In[60]: numlist2.split(None,0) # In[61]: numlist2.split(None,0)[0] # In[62]: titanic =pd.read_csv("https://vincentarelbundock.github.io/Rdatasets/csv/datasets/Titanic.csv") # In[65]: titanic=titanic.drop('Unnamed: 0', 1) # In[66]: titanic.info() # In[67]: titanic.head() # In[69]: a=titanic.iloc[:,1:] b=titanic.iloc[:,1:].values print(type(titanic)) print(type(a)) print(type(b)) # In[70]: a # In[71]: b # In[75]: titanic.columns[1:] # In[76]: titanic.as_matrix(columns=titanic.columns[1:]) # In[81]: data=titanic.as_matrix(columns=titanic.columns[1:]) # In[86]: len(data) # In[91]: range(0,len(data)) # In[92]: g=pd.DataFrame(data=data[0:,0:], # values index=range(0,len(data)), # 1st column as index columns=titanic.columns[1:]) # 1st row as the column names # In[93]: g.head() # In[ ]: