import pandas as pd import numpy as np import matplotlib.pyplot as plt # Make the graphs a bit prettier, and bigger pd.set_option('display.mpl_style', 'default') pd.set_option('display.line_width', 5000) pd.set_option('display.max_columns', 60) ## Read sample data set and convert string dates to datetimes bydatetime_df = pd.read_csv('data/bydate_shortstay_csv.csv',parse_dates=['datetime']) bydatetime_df.head() meanocc_df = bydatetime_df.groupby(['category','dayofweek'])['occupancy'].mean() meanocc_df meanoccdate = bydatetime_df.groupby(['category',bydatetime_df['datetime'].map(lambda x: x.date())])['occupancy'].mean() meanoccdate.head() meanoccdate.index[0] meanoccdate.index[0][1] ts = pd.Timestamp('6/26/2014') ts.strftime('%a') meanoccdate_df = pd.DataFrame(meanoccdate) meanoccdate_df['dayofweek'] = meanoccdate_df.index.map(lambda x: pd.Timestamp(x[1]).strftime('%a')) meanoccdate_df.head() meanoccdate_df.tail() meanoccdate_df['occupancy']['Total'] # Load the extension %load_ext rmagic %matplotlib inline # Create some data in Python and scatter it import pylab X = np.array([0,1,2,3,4]) Y = np.array([3,5,4,6,7]) pylab.scatter(X, Y) # "Push" these two numpy arrays into the R "space" %Rpush X Y %%R linmodel <- lm(Y~X) print(summary(linmodel)) # normed=1 plots probs instead of counts, alpha in [0,1] is transparency level (RGBA colors) plt.hist(meanoccdate_df['occupancy']['Total'], 20, normed=1, facecolor='green', alpha=0.75) plt.xlabel('Occupancy') plt.ylabel('Probability') plt.title(r'Histogram of Short Stay Occupancy') plt.grid(True) plt.show() %Rpush meanoccdate_df %R str(meanoccdate_df) meanoccdate_df['patient_type'] = meanoccdate_df.index.map(lambda x: x[0]) meanoccdate_df['date'] = meanoccdate_df.index.map(lambda x: x[1]) %Rpush meanoccdate_df %R str(meanoccdate_df) %R library(ggplot2) %%R g <- ggplot(data=meanoccdate_df[meanoccdate_df$patient_type == 'Total',]) + geom_histogram(aes(x=occupancy, y=..density..), fill="#FF9999", colour="black") print(g) %%R g2 <- ggplot(data=meanoccdate_df) + geom_histogram(aes(x=occupancy, y=..density..), binwidth=2, fill="#FF9999", colour="black") print(g2 + facet_grid(patient_type ~ dayofweek)) %R str(meanoccdate_df) %%R # Create vector with DOWs ordered as you wish DOW_order <- c("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat") # Change DOW from factor to ordered factor using vector you just made meanoccdate_df$dayofweek <- factor(meanoccdate_df$dayofweek,levels=DOW_order,ordered=TRUE) %R str(meanoccdate_df) %%R g2 <- ggplot(data=meanoccdate_df) + geom_histogram(aes(x=occupancy, y=..density..), binwidth=2, fill="#FF9999", colour="black") print(g2 + facet_grid(patient_type ~ dayofweek))