import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
s=pd.Series([1,3,5,np.nan,6,9])
s
0 1.0 1 3.0 2 5.0 3 NaN 4 6.0 5 9.0 dtype: float64
dates=pd.date_range('20180313',periods=6)
dates
DatetimeIndex(['2018-03-13', '2018-03-14', '2018-03-15', '2018-03-16', '2018-03-17', '2018-03-18'], dtype='datetime64[ns]', freq='D')
df=pd.DataFrame(np.random.randn(6,4),index=dates,columns=list('ABCD'))
df
A | B | C | D | |
---|---|---|---|---|
2018-03-13 | 0.049410 | 1.878310 | -0.147890 | -1.588265 |
2018-03-14 | 0.969644 | 0.951196 | -0.394295 | -0.012396 |
2018-03-15 | -2.533224 | 0.268784 | -0.418247 | 0.760050 |
2018-03-16 | 1.152236 | -0.168060 | 0.655259 | -0.416691 |
2018-03-17 | 0.584280 | -0.349605 | -0.124854 | 0.235189 |
2018-03-18 | 1.785686 | 0.309275 | -1.001553 | -1.055367 |
df2=pd.DataFrame({'A':1,
'B':pd.Timestamp('20180313'),
'C':pd.Series(1,index=list(range(4)),dtype='float32'),
'D':np.array([3]*4,dtype='int32'),
'E':pd.Categorical(['test','test2','test3','test4']),
'F':'fool'})
df2
A | B | C | D | E | F | |
---|---|---|---|---|---|---|
0 | 1 | 2018-03-13 | 1.0 | 3 | test | fool |
1 | 1 | 2018-03-13 | 1.0 | 3 | test2 | fool |
2 | 1 | 2018-03-13 | 1.0 | 3 | test3 | fool |
3 | 1 | 2018-03-13 | 1.0 | 3 | test4 | fool |
df2.dtypes
A int64 B datetime64[ns] C float32 D int32 E category F object dtype: object
df
A | B | C | D | |
---|---|---|---|---|
2018-03-13 | 0.049410 | 1.878310 | -0.147890 | -1.588265 |
2018-03-14 | 0.969644 | 0.951196 | -0.394295 | -0.012396 |
2018-03-15 | -2.533224 | 0.268784 | -0.418247 | 0.760050 |
2018-03-16 | 1.152236 | -0.168060 | 0.655259 | -0.416691 |
2018-03-17 | 0.584280 | -0.349605 | -0.124854 | 0.235189 |
2018-03-18 | 1.785686 | 0.309275 | -1.001553 | -1.055367 |
df.head()
A | B | C | D | |
---|---|---|---|---|
2018-03-13 | 0.049410 | 1.878310 | -0.147890 | -1.588265 |
2018-03-14 | 0.969644 | 0.951196 | -0.394295 | -0.012396 |
2018-03-15 | -2.533224 | 0.268784 | -0.418247 | 0.760050 |
2018-03-16 | 1.152236 | -0.168060 | 0.655259 | -0.416691 |
2018-03-17 | 0.584280 | -0.349605 | -0.124854 | 0.235189 |
df.head(2)
A | B | C | D | |
---|---|---|---|---|
2018-03-13 | 0.049410 | 1.878310 | -0.147890 | -1.588265 |
2018-03-14 | 0.969644 | 0.951196 | -0.394295 | -0.012396 |
df.tail(3)
A | B | C | D | |
---|---|---|---|---|
2018-03-16 | 1.152236 | -0.168060 | 0.655259 | -0.416691 |
2018-03-17 | 0.584280 | -0.349605 | -0.124854 | 0.235189 |
2018-03-18 | 1.785686 | 0.309275 | -1.001553 | -1.055367 |
df.index
DatetimeIndex(['2018-03-13', '2018-03-14', '2018-03-15', '2018-03-16', '2018-03-17', '2018-03-18'], dtype='datetime64[ns]', freq='D')
df2.index
Int64Index([0, 1, 2, 3], dtype='int64')
df.columns
Index(['A', 'B', 'C', 'D'], dtype='object')
df.values
array([[ 0.04941045, 1.87831022, -0.14789044, -1.58826543], [ 0.96964361, 0.9511957 , -0.39429511, -0.01239569], [-2.53322417, 0.2687836 , -0.41824712, 0.76004998], [ 1.15223636, -0.16805985, 0.65525934, -0.41669137], [ 0.58428023, -0.34960545, -0.1248537 , 0.23518853], [ 1.78568575, 0.30927525, -1.00155318, -1.05536747]])
df2.values
array([[1, Timestamp('2018-03-13 00:00:00'), 1.0, 3, 'test', 'fool'], [1, Timestamp('2018-03-13 00:00:00'), 1.0, 3, 'test2', 'fool'], [1, Timestamp('2018-03-13 00:00:00'), 1.0, 3, 'test3', 'fool'], [1, Timestamp('2018-03-13 00:00:00'), 1.0, 3, 'test4', 'fool']], dtype=object)
df.describe()
A | B | C | D | |
---|---|---|---|---|
count | 6.000000 | 6.000000 | 6.000000 | 6.000000 |
mean | 0.334672 | 0.481650 | -0.238597 | -0.346247 |
std | 1.519575 | 0.819667 | 0.540141 | 0.863000 |
min | -2.533224 | -0.349605 | -1.001553 | -1.588265 |
25% | 0.183128 | -0.058849 | -0.412259 | -0.895698 |
50% | 0.776962 | 0.289029 | -0.271093 | -0.214544 |
75% | 1.106588 | 0.790716 | -0.130613 | 0.173292 |
max | 1.785686 | 1.878310 | 0.655259 | 0.760050 |
type(df.describe())
pandas.core.frame.DataFrame
df
A | B | C | D | |
---|---|---|---|---|
2018-03-13 | 0.049410 | 1.878310 | -0.147890 | -1.588265 |
2018-03-14 | 0.969644 | 0.951196 | -0.394295 | -0.012396 |
2018-03-15 | -2.533224 | 0.268784 | -0.418247 | 0.760050 |
2018-03-16 | 1.152236 | -0.168060 | 0.655259 | -0.416691 |
2018-03-17 | 0.584280 | -0.349605 | -0.124854 | 0.235189 |
2018-03-18 | 1.785686 | 0.309275 | -1.001553 | -1.055367 |
df.T #转置
2018-03-13 00:00:00 | 2018-03-14 00:00:00 | 2018-03-15 00:00:00 | 2018-03-16 00:00:00 | 2018-03-17 00:00:00 | 2018-03-18 00:00:00 | |
---|---|---|---|---|---|---|
A | 0.049410 | 0.969644 | -2.533224 | 1.152236 | 0.584280 | 1.785686 |
B | 1.878310 | 0.951196 | 0.268784 | -0.168060 | -0.349605 | 0.309275 |
C | -0.147890 | -0.394295 | -0.418247 | 0.655259 | -0.124854 | -1.001553 |
D | -1.588265 | -0.012396 | 0.760050 | -0.416691 | 0.235189 | -1.055367 |
df.sort_index(axis=1,ascending=False) #按轴排序,按列降序
D | C | B | A | |
---|---|---|---|---|
2018-03-13 | -1.588265 | -0.147890 | 1.878310 | 0.049410 |
2018-03-14 | -0.012396 | -0.394295 | 0.951196 | 0.969644 |
2018-03-15 | 0.760050 | -0.418247 | 0.268784 | -2.533224 |
2018-03-16 | -0.416691 | 0.655259 | -0.168060 | 1.152236 |
2018-03-17 | 0.235189 | -0.124854 | -0.349605 | 0.584280 |
2018-03-18 | -1.055367 | -1.001553 | 0.309275 | 1.785686 |
df.sort_values(by='B')
A | B | C | D | |
---|---|---|---|---|
2018-03-17 | 0.584280 | -0.349605 | -0.124854 | 0.235189 |
2018-03-16 | 1.152236 | -0.168060 | 0.655259 | -0.416691 |
2018-03-15 | -2.533224 | 0.268784 | -0.418247 | 0.760050 |
2018-03-18 | 1.785686 | 0.309275 | -1.001553 | -1.055367 |
2018-03-14 | 0.969644 | 0.951196 | -0.394295 | -0.012396 |
2018-03-13 | 0.049410 | 1.878310 | -0.147890 | -1.588265 |