#!/usr/bin/env python # coding: utf-8 # # Pandas_QuickStart # Origin from http://pandas.pydata.org/pandas-docs/stable/ # by [openthings@163.com](http://my.oschina.net/u/2306127/blog?catalog=3420733), 2016-04. # # ## 6.1 Object Creation # Creating a Series by passing a list of values, letting pandas create a default integer index: # In[1]: import pandas as pd import numpy as np import matplotlib.pyplot as plt s = pd.Series([1,3,5,np.nan,6,8]) s # Creating a DataFrame by passing a numpy array, with a datetime index and labeled columns: # In[2]: dates = pd.date_range('20130101', periods=6) dates # In[7]: df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD')) df # Creating a DataFrame by passing a dict of objects that can be converted to series-like. # In[8]: df2 = pd.DataFrame({ 'A' : 1., 'B' : pd.Timestamp('20130102'), 'C' : pd.Series(1,index=list(range(4)),dtype='float32'), 'D' : np.array([3] * 4,dtype='int32'), 'E' : pd.Categorical(["test","train","test","train"]), 'F' : 'foo' }) df2 # In[9]: df2.dtypes # If you’re using IPython, tab completion for column names (as well as public attributes) is automatically enabled. Here’s a subset of the attributes that will be completed: # # In [13]: df2. # In[11]: df2. # As you can see, the columns A, B, C, and D are automatically tab completed. E is there as well; the rest of the attributes have been truncated for brevity. # ## 6.2 Viewing Data # In[14]: df.head() # In[15]: df.tail(3) # In[16]: df.index # In[17]: df.values # In[18]: df.describe() # In[19]: df.T # In[20]: df.sort_index(axis=1, ascending=False) # In[21]: df.sort_values(by='B') # ## 6.3 Selection # Getting # In[22]: df['A'] # In[23]: df[0:3] # In[24]: df['20130102':'20130104'] # ### 6.3.2 Selection by Label # For getting a cross section using a label # In[25]: df.loc[dates[0]] # Selecting on a multi-axis by label # In[26]: df.loc[:,['A','B']] # Showing label slicing, both endpoints are included # In[27]: df.loc['20130102':'20130104',['A','B']] # Reduction in the dimensions of the returned object # In[30]: df.loc['20130102',['A','B']] # For getting a scalar value # In[31]: df.loc[dates[0],'A'] # For getting fast access to a scalar (equiv to the prior method) # In[32]: df.at[dates[0],'A'] # ### 6.3.3 Selection by Position # See more in Selection by Position # Select via the position of the passed integers # In[33]: df.iloc[3] # By integer slices, acting similar to numpy/python # In[34]: df.iloc[3:5,0:2] # By lists of integer position locations, similar to the numpy/python style # In[35]: df.iloc[[1,2,4],[0,2]] # For slicing rows explicitly # In[36]: df.iloc[1:3,:] # For slicing columns explicitly # In[37]: df.iloc[:,1:3] # For getting a value explicitly # In[39]: df.iloc[1,1] # For getting fast access to a scalar (equiv to the prior method) # In[40]: df.iat[1,1] # ### 6.3.4 Boolean Indexing # Using a single column’s values to select data. # In[41]: df[df.A > 0] # A where operation for getting. # In[42]: df[df > 0] # #### Using the isin() method for filtering: # In[43]: df2 = df.copy() # 添加一列。 # In[44]: df2['E'] = ['one', 'one','two','three','four','three'] # In[45]: df2 # In[46]: df2[df2['E'].isin(['two','four'])] # ### 6.3.5 Setting # Setting a new column automatically aligns the data by the indexes # In[48]: s1 = pd.Series([1,2,3,4,5,6], index=pd.date_range('20130102', periods=6)) s1 # Setting values by position # In[49]: df.iat[0,1] = 0 # Setting by assigning with a numpy array # In[50]: df.loc[:,'D'] = np.array([5] * len(df)) # The result of the prior setting operations # In[51]: df # A where operation with setting. # In[52]: df2 = df.copy() # In[53]: df2[df2 > 0] = -df2 # In[54]: df2 # In[ ]: