from IPython.display import HTML
import requests
import pandas as pd
import MySQLdb
import pandas.io.sql as psql
import datetime
import time
import pytz
import time
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import matplotlib
styles = requests.get("https://raw.github.com/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers/master/styles/custom.css")
HTML(styles.text)
import json
s = requests.get("https://raw.github.com/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers/master/styles/bmh_matplotlibrc.json").json()
matplotlib.rcParams.update(s)
This is a test
pd.read_csv(
df=pd.read_csv("/home/nipun/study/datasets/UMASS/homeA-all/homeA-phase/2012-Jul-1-p1.csv",names=['timestamp','frequency','voltage'])
df
<class 'pandas.core.frame.DataFrame'> Int64Index: 85386 entries, 0 to 85385 Data columns (total 3 columns): timestamp 85386 non-null values frequency 85386 non-null values voltage 85386 non-null values dtypes: float64(2), int64(1)
df.describe()
timestamp | frequency | voltage | |
---|---|---|---|
count | 8.538600e+04 | 85386.000000 | 85386.000000 |
mean | 1.341091e+09 | 59.993085 | 120.571565 |
std | 2.490415e+04 | 0.018714 | 1.057214 |
min | 1.341048e+09 | 59.930000 | 116.851000 |
25% | 1.341069e+09 | 59.978000 | 119.959000 |
50% | 1.341091e+09 | 59.988000 | 120.644000 |
75% | 1.341113e+09 | 60.008000 | 121.242000 |
max | 1.341134e+09 | 60.072000 | 150.831000 |
df.index=pd.to_datetime(df.timestamp*1e9)
df.drop('timestamp',1)
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 85386 entries, 2012-06-30 09:17:42 to 2012-07-01 09:15:31 Data columns (total 2 columns): frequency 85386 non-null values voltage 85386 non-null values dtypes: float64(2)
df=df.drop('timestamp',1)
matplotlib.rcParams.update({'font.size': 20})
plt.boxplot(df.frequency)
{'boxes': [<matplotlib.lines.Line2D at 0x4d56690>], 'caps': [<matplotlib.lines.Line2D at 0x4a0e9d0>, <matplotlib.lines.Line2D at 0x4d56050>], 'fliers': [<matplotlib.lines.Line2D at 0x4d5e350>, <matplotlib.lines.Line2D at 0x4d5e950>], 'medians': [<matplotlib.lines.Line2D at 0x4d56cd0>], 'whiskers': [<matplotlib.lines.Line2D at 0x4a0e090>, <matplotlib.lines.Line2D at 0x4a0e310>]}
v=df.voltage.values
v=v[v<150]
plt.boxplot(v)
plt.title('Voltage Boxplot (US)\n SMART* Dataset')
plt.ylabel('Voltage (V)')
plt.xticks([1],[''])
plt.grid()
fig=plt.gcf()
fig.set_size_inches(7,9)
plt.savefig('us_voltage.png',dpi=100,bbox_inches='tight')
v
array([ 121.08 , 121.044, 121.044, ..., 120.187, 120.186, 120.208])
plt.boxplot(v)
{'boxes': [<matplotlib.lines.Line2D at 0x53f0990>], 'caps': [<matplotlib.lines.Line2D at 0x3de09d0>, <matplotlib.lines.Line2D at 0x3df42d0>], 'fliers': [<matplotlib.lines.Line2D at 0x53f5a10>, <matplotlib.lines.Line2D at 0x54062d0>], 'medians': [<matplotlib.lines.Line2D at 0x5402d90>], 'whiskers': [<matplotlib.lines.Line2D at 0x3de0d10>, <matplotlib.lines.Line2D at 0x3de0c50>]}
plt.show()
plt.plot(v)
[<matplotlib.lines.Line2D at 0x47ecc10>]
plt.show()