In [12]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

s = pd.Series([1,3,5,np.nan,6,8])
s

Out[12]:

0     1
1     3
2     5
3   NaN
4     6
5     8
dtype: float64

In [13]:

# Cteate a DataFrame
dates = pd.date_range('20130101', periods=6)
# dates 
df = pd.DataFrame(np.random.randn(6,4),index = dates,columns=list('ABCD'))
df

Out[13]:

	A	B	C	D
2013-01-01	0.709714	-0.570070	-0.550845	-1.057212
2013-01-02	-0.628737	0.391646	-1.091479	0.537669
2013-01-03	-0.294052	-1.296279	-0.912759	0.441580
2013-01-04	0.528582	0.532051	-1.274615	0.146372
2013-01-05	0.501640	1.164900	2.032659	0.443303
2013-01-06	1.862346	2.191615	-0.349397	-0.309474

In [14]:

df.dtypes

Out[14]:

A    float64
B    float64
C    float64
D    float64
dtype: object

In [15]:

df.head(2)

Out[15]:

	A	B	C	D
2013-01-01	0.709714	-0.570070	-0.550845	-1.057212
2013-01-02	-0.628737	0.391646	-1.091479	0.537669

In [16]:

df.tail(2)

Out[16]:

	A	B	C	D
2013-01-05	0.501640	1.164900	2.032659	0.443303
2013-01-06	1.862346	2.191615	-0.349397	-0.309474

In [20]:

# Display the index, columns, and the underlying numpy data
df.index
df.columns
df.values

Out[20]:

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [17]:

# 查看描述性统计
df.describe()

Out[17]:

	A	B	C	D
count	6.000000	6.000000	6.000000	6.000000
mean	0.446582	0.402311	-0.357739	0.033706
std	0.870429	1.235381	1.219708	0.617847
min	-0.628737	-1.296279	-1.274615	-1.057212
25%	-0.095129	-0.329641	-1.046799	-0.195513
50%	0.515111	0.461849	-0.731802	0.293976
75%	0.664431	1.006688	-0.399759	0.442872
max	1.862346	2.191615	2.032659	0.537669

In [19]:

s = pd.Series([1,3,5,np.nan,6,8], index=dates).shift(2)
s

Out[19]:

2013-01-01   NaN
2013-01-02   NaN
2013-01-03     1
2013-01-04     3
2013-01-05     5
2013-01-06   NaN
Freq: D, dtype: float64

In [21]:

# Return cumulative sum over requested axis.
df.apply(np.cumsum)

Out[21]:

	A	B	C	D
2013-01-01	0.709714	-0.570070	-0.550845	-1.057212
2013-01-02	0.080977	-0.178424	-1.642323	-0.519543
2013-01-03	-0.213076	-1.474703	-2.555082	-0.077962
2013-01-04	0.315507	-0.942652	-3.829697	0.068409
2013-01-05	0.817147	0.222248	-1.797039	0.511712
2013-01-06	2.679493	2.413864	-2.146435	0.202238

In [30]:

# Concat function
piece = [df[:2],df[2:3],df[3:1]]
pd.concat(piece)

Out[30]:

	A	B	C	D
2013-01-01	0.709714	-0.570070	-0.550845	-1.057212
2013-01-02	-0.628737	0.391646	-1.091479	0.537669
2013-01-03	-0.294052	-1.296279	-0.912759	0.441580

In [29]:

# Merge function
left = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]})
right = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [4, 5]})
pd.merge(left,right,on='key')

Out[29]:

	key	lval_x	lval_y
0	foo	1	4
1	foo	1	5
2	foo	2	4
3	foo	2	5

In [38]:

# Append function
df = pd.DataFrame(np.random.randn(8, 4), columns=['A','B','C','D'])
s = df.iloc[3]
print(df)
print(s)
df.append(s, ignore_index=True)

          A         B         C         D
0  0.307377  0.587502 -1.498826 -1.413681
1  0.606972 -0.949604  0.330130  0.221957
2 -0.224912 -1.682801  0.376430  0.132976
3  0.177087  1.054522  1.107316  0.857462
4 -1.378083  0.585530  0.877582 -1.344324
5 -0.288912  0.378269 -0.178293  1.931936
6 -0.196315 -0.300350  2.258136  0.007789
7 -0.863666  0.695809 -2.187412 -0.436914
A    0.177087
B    1.054522
C    1.107316
D    0.857462
Name: 3, dtype: float64

Out[38]:

	A	B	C	D
0	0.307377	0.587502	-1.498826	-1.413681
1	0.606972	-0.949604	0.330130	0.221957
2	-0.224912	-1.682801	0.376430	0.132976
3	0.177087	1.054522	1.107316	0.857462
4	-1.378083	0.585530	0.877582	-1.344324
5	-0.288912	0.378269	-0.178293	1.931936
6	-0.196315	-0.300350	2.258136	0.007789
7	-0.863666	0.695809	-2.187412	-0.436914
8	0.177087	1.054522	1.107316	0.857462

In [39]:

df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
						'foo', 'bar', 'foo', 'foo'],
						'B' : ['one', 'one', 'two', 'three',
						'two', 'two', 'one', 'three'],
						'C' : np.random.randn(8),
						'D' : np.random.randn(8)})
# Grouping and then applying a function sum to the resulting groups.

print(df)
df.groupby('A').sum()

     A      B         C         D
0  foo    one -1.010950 -1.443440
1  bar    one -1.023700  0.052766
2  foo    two -0.642614 -2.438775
3  bar  three -0.442711  0.525121
4  foo    two -1.176690 -0.230537
5  bar    two -0.771803 -0.347051
6  foo    one -1.313567 -1.211388
7  foo  three  0.779921 -1.279009

Out[39]:

	C	D
A
bar	-2.238214	0.230836
foo	-3.363901	-6.603149

In [44]:

# Time Series 
rng = pd.date_range('1/1/2012', periods=100, freq='S')
# print(rng)
ts = pd.Series(np.random.randint(0, 500, len(rng)), index=rng)
# print(ts)
ts.resample('5Min').sum()

Out[44]:

263.22

In [45]:

# ts = pd.Series(np.random.randn(1000), index=pd.date_range('1/1/2000', periods=1000))
ts=pd.Series(np.random.randn(1000),index=pd.date_range('1/1/2000',periods=1000))
ts = ts.cumsum()
ts.plot()

/Users/ulson_hu/anaconda/lib/python2.7/site-packages/matplotlib/__init__.py:830: MatplotlibDeprecationWarning: axes.color_cycle is deprecated and replaced with axes.prop_cycle; please use the latter.
  mplDeprecation)

Out[45]:

<matplotlib.axes._subplots.AxesSubplot at 0x11262d250>