import pandas as pd
import numpy as np
from dateutil.parser import parse
import datetime
from pandas_datareader import data, wb
df = pd.read_csv('beatles.csv')
df.dtypes
Unnamed: 0 int64 Title object Released object Label object UK Chart Position float64 US Chart Position float64 BPI Certification object RIAA Certification object Count Songs int64 Year int64 dtype: object
df = pd.read_csv('beatles.csv', converters={'Released': parse})
df.dtypes
Unnamed: 0 int64 Title object Released datetime64[ns] Label object UK Chart Position float64 US Chart Position float64 BPI Certification object RIAA Certification object Count Songs int64 Year int64 dtype: object
dates = ['10-10-2015', '9/8/2012', '2/2014', 'Sep 17, 2000', 'foo']
ser = pd.Series(pd.to_datetime(dates, errors='coerce'))
ser
0 2015-10-10 1 2012-09-08 2 2014-02-01 3 2000-09-17 4 NaT dtype: datetime64[ns]
ser.dt
<pandas.tseries.common.DatetimeProperties object at 0x10f3e49e8>
ser.dt.year
0 2015.0 1 2012.0 2 2014.0 3 2000.0 4 NaN dtype: float64
ser.dt.month
0 10.0 1 9.0 2 2.0 3 9.0 4 NaN dtype: float64
ser.dt.hour
0 0.0 1 0.0 2 0.0 3 0.0 4 NaN dtype: float64
idx = pd.date_range('1/1/2000', periods=500, freq='d')
idx
DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03', '2000-01-04', '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-08', '2000-01-09', '2000-01-10', ... '2001-05-05', '2001-05-06', '2001-05-07', '2001-05-08', '2001-05-09', '2001-05-10', '2001-05-11', '2001-05-12', '2001-05-13', '2001-05-14'], dtype='datetime64[ns]', length=500, freq='D')
ser = pd.Series(np.random.randn(len(idx)), index=idx)
ser
2000-01-01 -0.306307 2000-01-02 -0.435708 2000-01-03 -1.535846 2000-01-04 0.409924 2000-01-05 -0.666694 2000-01-06 -0.480828 2000-01-07 0.379973 2000-01-08 -0.773511 2000-01-09 0.396353 2000-01-10 -0.294093 2000-01-11 0.442196 2000-01-12 -1.519349 2000-01-13 -0.649513 2000-01-14 -2.263706 2000-01-15 -0.340966 2000-01-16 0.435463 2000-01-17 -1.666545 2000-01-18 1.452651 2000-01-19 -0.037077 2000-01-20 0.538926 2000-01-21 -0.042326 2000-01-22 -0.227057 2000-01-23 1.024661 2000-01-24 -0.687116 2000-01-25 1.405593 2000-01-26 -0.144300 2000-01-27 0.630102 2000-01-28 0.064640 2000-01-29 -0.376007 2000-01-30 0.017697 ... 2001-04-15 -1.672253 2001-04-16 -1.820955 2001-04-17 0.589024 2001-04-18 -0.212713 2001-04-19 2.062938 2001-04-20 -0.318086 2001-04-21 -0.284368 2001-04-22 0.135038 2001-04-23 -0.435896 2001-04-24 1.432767 2001-04-25 -0.775905 2001-04-26 0.086192 2001-04-27 1.469889 2001-04-28 -0.358339 2001-04-29 1.509992 2001-04-30 -0.467628 2001-05-01 -0.810806 2001-05-02 0.352891 2001-05-03 -0.994101 2001-05-04 0.586770 2001-05-05 -0.028301 2001-05-06 -0.308731 2001-05-07 -2.092958 2001-05-08 1.126250 2001-05-09 -0.172683 2001-05-10 -0.826971 2001-05-11 0.324843 2001-05-12 1.402411 2001-05-13 0.371532 2001-05-14 -1.213970 Freq: D, dtype: float64
ser2 = ser + ser.shift(2)
ser2
2000-01-01 NaN 2000-01-02 NaN 2000-01-03 -1.842153 2000-01-04 -0.025784 2000-01-05 -2.202540 2000-01-06 -0.070904 2000-01-07 -0.286721 2000-01-08 -1.254338 2000-01-09 0.776326 2000-01-10 -1.067604 2000-01-11 0.838549 2000-01-12 -1.813442 2000-01-13 -0.207317 2000-01-14 -3.783055 2000-01-15 -0.990478 2000-01-16 -1.828243 2000-01-17 -2.007510 2000-01-18 1.888114 2000-01-19 -1.703621 2000-01-20 1.991577 2000-01-21 -0.079402 2000-01-22 0.311869 2000-01-23 0.982335 2000-01-24 -0.914173 2000-01-25 2.430253 2000-01-26 -0.831416 2000-01-27 2.035695 2000-01-28 -0.079660 2000-01-29 0.254096 2000-01-30 0.082337 ... 2001-04-15 -2.869890 2001-04-16 -2.238086 2001-04-17 -1.083230 2001-04-18 -2.033668 2001-04-19 2.651962 2001-04-20 -0.530799 2001-04-21 1.778571 2001-04-22 -0.183047 2001-04-23 -0.720264 2001-04-24 1.567806 2001-04-25 -1.211801 2001-04-26 1.518959 2001-04-27 0.693983 2001-04-28 -0.272147 2001-04-29 2.979880 2001-04-30 -0.825967 2001-05-01 0.699186 2001-05-02 -0.114737 2001-05-03 -1.804907 2001-05-04 0.939660 2001-05-05 -1.022402 2001-05-06 0.278039 2001-05-07 -2.121259 2001-05-08 0.817519 2001-05-09 -2.265641 2001-05-10 0.299279 2001-05-11 0.152159 2001-05-12 0.575440 2001-05-13 0.696375 2001-05-14 0.188441 Freq: D, dtype: float64
ser.resample('Q').sum()
2000-03-31 3.521232 2000-06-30 0.306693 2000-09-30 -6.283600 2000-12-31 -0.992922 2001-03-31 16.540240 2001-06-30 -3.598359 Freq: Q-DEC, dtype: float64
ser.resample('Q', label='left').sum()
1999-12-31 3.521232 2000-03-31 0.306693 2000-06-30 -6.283600 2000-09-30 -0.992922 2000-12-31 16.540240 2001-03-31 -3.598359 Freq: Q-DEC, dtype: float64
ser.resample('m').mean()
2000-01-31 -0.189758 2000-02-29 0.078981 2000-03-31 0.229460 2000-04-30 0.042248 2000-05-31 0.103755 2000-06-30 -0.139238 2000-07-31 -0.280710 2000-08-31 0.077720 2000-09-30 0.000302 2000-10-31 0.169006 2000-11-30 -0.021901 2000-12-31 -0.179841 2001-01-31 -0.010485 2001-02-28 0.281741 2001-03-31 0.289565 2001-04-30 -0.043818 2001-05-31 -0.163130 Freq: M, dtype: float64
start = datetime.datetime(2010, 1, 1)
end = datetime.datetime(2016, 12, 28)
yahoo_df = data.DataReader('F', 'yahoo', start, end)
yahoo_df.tail().T
Date | 2016-12-21 00:00:00 | 2016-12-22 00:00:00 | 2016-12-23 00:00:00 | 2016-12-27 00:00:00 | 2016-12-28 00:00:00 |
---|---|---|---|---|---|
Open | 12.73 | 12.63 | 12.43 | 12.43 | 12.37 |
High | 12.77 | 12.64 | 12.46 | 12.51 | 12.45 |
Low | 12.64 | 12.40 | 12.36 | 12.36 | 12.22 |
Close | 12.64 | 12.40 | 12.46 | 12.39 | 12.25 |
Volume | 18056600.00 | 27556900.00 | 15578200.00 | 19403000.00 | 26678100.00 |
Adj Close | 12.64 | 12.40 | 12.46 | 12.39 | 12.25 |
yahoo_df['mean50'] = yahoo_df['Close'].rolling(window=50, center=False).mean()
yahoo_df
Open | High | Low | Close | Volume | Adj Close | mean50 | |
---|---|---|---|---|---|---|---|
Date | |||||||
2010-01-04 | 10.17 | 10.28 | 10.05 | 10.28 | 60855800 | 8.554412 | NaN |
2010-01-05 | 10.45 | 11.24 | 10.40 | 10.96 | 215620200 | 9.120268 | NaN |
2010-01-06 | 11.21 | 11.46 | 11.13 | 11.37 | 200070600 | 9.461446 | NaN |
2010-01-07 | 11.46 | 11.69 | 11.32 | 11.66 | 130201700 | 9.702767 | NaN |
2010-01-08 | 11.67 | 11.74 | 11.46 | 11.69 | 130463000 | 9.727731 | NaN |
2010-01-11 | 11.90 | 12.14 | 11.78 | 12.11 | 170626200 | 10.077230 | NaN |
2010-01-12 | 11.98 | 12.03 | 11.72 | 11.87 | 162995900 | 9.877516 | NaN |
2010-01-13 | 11.91 | 11.93 | 11.47 | 11.68 | 154527100 | 9.719410 | NaN |
2010-01-14 | 11.65 | 11.86 | 11.51 | 11.76 | 116531200 | 9.785981 | NaN |
2010-01-15 | 11.74 | 11.76 | 11.55 | 11.60 | 96149800 | 9.652838 | NaN |
2010-01-19 | 11.51 | 11.83 | 11.46 | 11.75 | 65934000 | 9.777659 | NaN |
2010-01-20 | 11.68 | 11.69 | 11.50 | 11.51 | 71649500 | 9.577946 | NaN |
2010-01-21 | 11.53 | 11.62 | 11.01 | 11.18 | 121451400 | 9.303339 | NaN |
2010-01-22 | 11.01 | 11.12 | 10.41 | 10.52 | 161530100 | 8.754126 | NaN |
2010-01-25 | 10.73 | 11.10 | 10.61 | 11.03 | 121621500 | 9.178517 | NaN |
2010-01-26 | 11.17 | 11.46 | 11.07 | 11.19 | 108250500 | 9.311660 | NaN |
2010-01-27 | 11.57 | 11.62 | 11.22 | 11.55 | 105091600 | 9.611231 | NaN |
2010-01-28 | 11.90 | 11.95 | 11.27 | 11.41 | 203320000 | 9.494731 | NaN |
2010-01-29 | 11.60 | 11.61 | 10.70 | 10.84 | 159741200 | 9.020411 | NaN |
2010-02-01 | 11.14 | 11.18 | 10.93 | 11.12 | 82748200 | 9.253410 | NaN |
2010-02-02 | 11.26 | 11.52 | 11.19 | 11.39 | 119714900 | 9.478089 | NaN |
2010-02-03 | 11.49 | 11.66 | 11.42 | 11.64 | 90125500 | 9.686124 | NaN |
2010-02-04 | 11.49 | 11.53 | 11.00 | 11.06 | 129792200 | 9.203482 | NaN |
2010-02-05 | 10.97 | 11.11 | 10.49 | 10.91 | 181535200 | 9.078661 | NaN |
2010-02-08 | 11.09 | 11.32 | 10.88 | 10.97 | 92031400 | 9.128589 | NaN |
2010-02-09 | 11.18 | 11.22 | 11.02 | 11.15 | 83207100 | 9.278374 | NaN |
2010-02-10 | 11.12 | 11.14 | 10.90 | 10.94 | 73395600 | 9.103625 | NaN |
2010-02-11 | 11.00 | 11.19 | 10.88 | 11.18 | 65116200 | 9.303339 | NaN |
2010-02-12 | 10.92 | 11.18 | 10.85 | 11.12 | 69465400 | 9.253410 | NaN |
2010-02-16 | 11.21 | 11.38 | 11.11 | 11.32 | 62537500 | 9.419838 | NaN |
... | ... | ... | ... | ... | ... | ... | ... |
2016-11-15 | 12.06 | 12.14 | 11.95 | 12.04 | 31498500 | 12.040000 | 12.0164 |
2016-11-16 | 12.00 | 12.05 | 11.92 | 12.00 | 25195500 | 12.000000 | 12.0024 |
2016-11-17 | 11.90 | 11.96 | 11.78 | 11.87 | 38502500 | 11.870000 | 11.9852 |
2016-11-18 | 11.87 | 11.90 | 11.73 | 11.76 | 30662000 | 11.760000 | 11.9728 |
2016-11-21 | 11.77 | 11.90 | 11.77 | 11.79 | 19508600 | 11.790000 | 11.9546 |
2016-11-22 | 11.76 | 11.91 | 11.73 | 11.89 | 24274700 | 11.890000 | 11.9448 |
2016-11-23 | 11.90 | 12.00 | 11.85 | 11.90 | 22414000 | 11.900000 | 11.9400 |
2016-11-25 | 11.95 | 12.06 | 11.93 | 12.04 | 11439600 | 12.040000 | 11.9386 |
2016-11-28 | 12.02 | 12.05 | 11.89 | 11.92 | 24407200 | 11.920000 | 11.9348 |
2016-11-29 | 11.93 | 12.00 | 11.87 | 11.92 | 23690800 | 11.920000 | 11.9310 |
2016-11-30 | 11.97 | 12.14 | 11.92 | 11.96 | 47646200 | 11.960000 | 11.9302 |
2016-12-01 | 12.23 | 12.80 | 12.18 | 12.43 | 95143600 | 12.430000 | 11.9370 |
2016-12-02 | 12.58 | 12.58 | 12.15 | 12.24 | 38601600 | 12.240000 | 11.9382 |
2016-12-05 | 12.31 | 12.50 | 12.29 | 12.44 | 36118300 | 12.440000 | 11.9436 |
2016-12-06 | 12.40 | 12.59 | 12.37 | 12.56 | 23668100 | 12.560000 | 11.9546 |
2016-12-07 | 12.59 | 13.15 | 12.54 | 13.06 | 58440500 | 13.060000 | 11.9762 |
2016-12-08 | 13.07 | 13.15 | 12.97 | 13.03 | 41451800 | 13.030000 | 11.9950 |
2016-12-09 | 13.00 | 13.20 | 12.91 | 13.17 | 45233200 | 13.170000 | 12.0190 |
2016-12-12 | 13.08 | 13.10 | 12.81 | 12.82 | 39544700 | 12.820000 | 12.0340 |
2016-12-13 | 12.84 | 12.88 | 12.75 | 12.77 | 38816300 | 12.770000 | 12.0474 |
2016-12-14 | 12.65 | 12.79 | 12.50 | 12.53 | 42243500 | 12.530000 | 12.0540 |
2016-12-15 | 12.54 | 12.72 | 12.52 | 12.58 | 23672100 | 12.580000 | 12.0560 |
2016-12-16 | 12.66 | 12.70 | 12.56 | 12.63 | 28099200 | 12.630000 | 12.0608 |
2016-12-19 | 12.59 | 12.80 | 12.59 | 12.66 | 22780900 | 12.660000 | 12.0682 |
2016-12-20 | 12.70 | 12.82 | 12.68 | 12.78 | 17752900 | 12.780000 | 12.0814 |
2016-12-21 | 12.73 | 12.77 | 12.64 | 12.64 | 18056600 | 12.640000 | 12.0944 |
2016-12-22 | 12.63 | 12.64 | 12.40 | 12.40 | 27556900 | 12.400000 | 12.1032 |
2016-12-23 | 12.43 | 12.46 | 12.36 | 12.46 | 15578200 | 12.460000 | 12.1142 |
2016-12-27 | 12.43 | 12.51 | 12.36 | 12.39 | 19403000 | 12.390000 | 12.1238 |
2016-12-28 | 12.37 | 12.45 | 12.22 | 12.25 | 26678100 | 12.250000 | 12.1312 |
1760 rows × 7 columns
yahoo_df['mean200'] = yahoo_df['Close'].rolling(window=200, center=False).mean()
yahoo_df
Open | High | Low | Close | Volume | Adj Close | mean50 | mean200 | |
---|---|---|---|---|---|---|---|---|
Date | ||||||||
2010-01-04 | 10.17 | 10.28 | 10.05 | 10.28 | 60855800 | 8.554412 | NaN | NaN |
2010-01-05 | 10.45 | 11.24 | 10.40 | 10.96 | 215620200 | 9.120268 | NaN | NaN |
2010-01-06 | 11.21 | 11.46 | 11.13 | 11.37 | 200070600 | 9.461446 | NaN | NaN |
2010-01-07 | 11.46 | 11.69 | 11.32 | 11.66 | 130201700 | 9.702767 | NaN | NaN |
2010-01-08 | 11.67 | 11.74 | 11.46 | 11.69 | 130463000 | 9.727731 | NaN | NaN |
2010-01-11 | 11.90 | 12.14 | 11.78 | 12.11 | 170626200 | 10.077230 | NaN | NaN |
2010-01-12 | 11.98 | 12.03 | 11.72 | 11.87 | 162995900 | 9.877516 | NaN | NaN |
2010-01-13 | 11.91 | 11.93 | 11.47 | 11.68 | 154527100 | 9.719410 | NaN | NaN |
2010-01-14 | 11.65 | 11.86 | 11.51 | 11.76 | 116531200 | 9.785981 | NaN | NaN |
2010-01-15 | 11.74 | 11.76 | 11.55 | 11.60 | 96149800 | 9.652838 | NaN | NaN |
2010-01-19 | 11.51 | 11.83 | 11.46 | 11.75 | 65934000 | 9.777659 | NaN | NaN |
2010-01-20 | 11.68 | 11.69 | 11.50 | 11.51 | 71649500 | 9.577946 | NaN | NaN |
2010-01-21 | 11.53 | 11.62 | 11.01 | 11.18 | 121451400 | 9.303339 | NaN | NaN |
2010-01-22 | 11.01 | 11.12 | 10.41 | 10.52 | 161530100 | 8.754126 | NaN | NaN |
2010-01-25 | 10.73 | 11.10 | 10.61 | 11.03 | 121621500 | 9.178517 | NaN | NaN |
2010-01-26 | 11.17 | 11.46 | 11.07 | 11.19 | 108250500 | 9.311660 | NaN | NaN |
2010-01-27 | 11.57 | 11.62 | 11.22 | 11.55 | 105091600 | 9.611231 | NaN | NaN |
2010-01-28 | 11.90 | 11.95 | 11.27 | 11.41 | 203320000 | 9.494731 | NaN | NaN |
2010-01-29 | 11.60 | 11.61 | 10.70 | 10.84 | 159741200 | 9.020411 | NaN | NaN |
2010-02-01 | 11.14 | 11.18 | 10.93 | 11.12 | 82748200 | 9.253410 | NaN | NaN |
2010-02-02 | 11.26 | 11.52 | 11.19 | 11.39 | 119714900 | 9.478089 | NaN | NaN |
2010-02-03 | 11.49 | 11.66 | 11.42 | 11.64 | 90125500 | 9.686124 | NaN | NaN |
2010-02-04 | 11.49 | 11.53 | 11.00 | 11.06 | 129792200 | 9.203482 | NaN | NaN |
2010-02-05 | 10.97 | 11.11 | 10.49 | 10.91 | 181535200 | 9.078661 | NaN | NaN |
2010-02-08 | 11.09 | 11.32 | 10.88 | 10.97 | 92031400 | 9.128589 | NaN | NaN |
2010-02-09 | 11.18 | 11.22 | 11.02 | 11.15 | 83207100 | 9.278374 | NaN | NaN |
2010-02-10 | 11.12 | 11.14 | 10.90 | 10.94 | 73395600 | 9.103625 | NaN | NaN |
2010-02-11 | 11.00 | 11.19 | 10.88 | 11.18 | 65116200 | 9.303339 | NaN | NaN |
2010-02-12 | 10.92 | 11.18 | 10.85 | 11.12 | 69465400 | 9.253410 | NaN | NaN |
2010-02-16 | 11.21 | 11.38 | 11.11 | 11.32 | 62537500 | 9.419838 | NaN | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... |
2016-11-15 | 12.06 | 12.14 | 11.95 | 12.04 | 31498500 | 12.040000 | 12.0164 | 12.70215 |
2016-11-16 | 12.00 | 12.05 | 11.92 | 12.00 | 25195500 | 12.000000 | 12.0024 | 12.70485 |
2016-11-17 | 11.90 | 11.96 | 11.78 | 11.87 | 38502500 | 11.870000 | 11.9852 | 12.70655 |
2016-11-18 | 11.87 | 11.90 | 11.73 | 11.76 | 30662000 | 11.760000 | 11.9728 | 12.70810 |
2016-11-21 | 11.77 | 11.90 | 11.77 | 11.79 | 19508600 | 11.790000 | 11.9546 | 12.70910 |
2016-11-22 | 11.76 | 11.91 | 11.73 | 11.89 | 24274700 | 11.890000 | 11.9448 | 12.71180 |
2016-11-23 | 11.90 | 12.00 | 11.85 | 11.90 | 22414000 | 11.900000 | 11.9400 | 12.71460 |
2016-11-25 | 11.95 | 12.06 | 11.93 | 12.04 | 11439600 | 12.040000 | 11.9386 | 12.71895 |
2016-11-28 | 12.02 | 12.05 | 11.89 | 11.92 | 24407200 | 11.920000 | 11.9348 | 12.72080 |
2016-11-29 | 11.93 | 12.00 | 11.87 | 11.92 | 23690800 | 11.920000 | 11.9310 | 12.72105 |
2016-11-30 | 11.97 | 12.14 | 11.92 | 11.96 | 47646200 | 11.960000 | 11.9302 | 12.71950 |
2016-12-01 | 12.23 | 12.80 | 12.18 | 12.43 | 95143600 | 12.430000 | 11.9370 | 12.72035 |
2016-12-02 | 12.58 | 12.58 | 12.15 | 12.24 | 38601600 | 12.240000 | 11.9382 | 12.72105 |
2016-12-05 | 12.31 | 12.50 | 12.29 | 12.44 | 36118300 | 12.440000 | 11.9436 | 12.72045 |
2016-12-06 | 12.40 | 12.59 | 12.37 | 12.56 | 23668100 | 12.560000 | 11.9546 | 12.72115 |
2016-12-07 | 12.59 | 13.15 | 12.54 | 13.06 | 58440500 | 13.060000 | 11.9762 | 12.72605 |
2016-12-08 | 13.07 | 13.15 | 12.97 | 13.03 | 41451800 | 13.030000 | 11.9950 | 12.72925 |
2016-12-09 | 13.00 | 13.20 | 12.91 | 13.17 | 45233200 | 13.170000 | 12.0190 | 12.73275 |
2016-12-12 | 13.08 | 13.10 | 12.81 | 12.82 | 39544700 | 12.820000 | 12.0340 | 12.73430 |
2016-12-13 | 12.84 | 12.88 | 12.75 | 12.77 | 38816300 | 12.770000 | 12.0474 | 12.73270 |
2016-12-14 | 12.65 | 12.79 | 12.50 | 12.53 | 42243500 | 12.530000 | 12.0540 | 12.72925 |
2016-12-15 | 12.54 | 12.72 | 12.52 | 12.58 | 23672100 | 12.580000 | 12.0560 | 12.72445 |
2016-12-16 | 12.66 | 12.70 | 12.56 | 12.63 | 28099200 | 12.630000 | 12.0608 | 12.71965 |
2016-12-19 | 12.59 | 12.80 | 12.59 | 12.66 | 22780900 | 12.660000 | 12.0682 | 12.71485 |
2016-12-20 | 12.70 | 12.82 | 12.68 | 12.78 | 17752900 | 12.780000 | 12.0814 | 12.71260 |
2016-12-21 | 12.73 | 12.77 | 12.64 | 12.64 | 18056600 | 12.640000 | 12.0944 | 12.70985 |
2016-12-22 | 12.63 | 12.64 | 12.40 | 12.40 | 27556900 | 12.400000 | 12.1032 | 12.70610 |
2016-12-23 | 12.43 | 12.46 | 12.36 | 12.46 | 15578200 | 12.460000 | 12.1142 | 12.70195 |
2016-12-27 | 12.43 | 12.51 | 12.36 | 12.39 | 19403000 | 12.390000 | 12.1238 | 12.69755 |
2016-12-28 | 12.37 | 12.45 | 12.22 | 12.25 | 26678100 | 12.250000 | 12.1312 | 12.69265 |
1760 rows × 8 columns
%matplotlib inline
yahoo_df.plot()
<matplotlib.axes._subplots.AxesSubplot at 0x10ffab780>
yahoo_df[['Close', 'mean50', 'mean200']].plot()
<matplotlib.axes._subplots.AxesSubplot at 0x11013e9e8>