from __future__ import division
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

x = np.array([2,7,5])    # explicit vector creation
x

y = np.arange(4, 13, 3)  # vector creation from a sequence (start, stop, step)
y

x + y    # vectors can be added

x / y    # divided

x ** y    # exponentiated

x[1]    # vector elements can be selected by position

x[1:3]  # multiple elements can be selected using slices

x[-2]  # elements can be specified as offset from end

x[np.array([0,1])]  # elements can be specified as an array

Z = np.matrix(np.arange(1,13)).reshape((4, 3))
Z                 # note: R arranges the elements column-wise

Z[2:4, 1:3]    # R is 1-based and includes ending index, Python is 0 based and does not.

Z[:, 1:3]    # column slice

Z.shape

x = np.random.uniform(0.0, 1.0, 50)
x

y = np.random.normal(0.0, 1.0, 50)
y

fig, ax = plt.subplots()
plt.scatter(x, y)

fig, ax = plt.subplots()
plt.xlabel("Random Uniform")
plt.ylabel("Random Normal")
plt.scatter(x, y, marker='o', color='red')  # plot customizations

plt.subplot(121)    # parameter indicates 1 rows, 2 col, first figure
plt.scatter(x, y)
plt.subplot(122)
plt.hist(y)

# data comes from ISL book site: http://www-bcf.usc.edu/~gareth/ISL/data.html
auto_df = pd.read_csv("../data/Auto.csv")
auto_df.columns     # column names

auto_df.shape    # number of rows, number of columns

type(auto_df)

auto_df.describe()  # equivalent of R's DataFrame.summary()

plt.ylabel("MPG")
auto_df.plot(x="cylinders", y="mpg", style='o')

auto_df.boxplot(column="mpg", by="cylinders") # MPG distribution by number of cylinders

# similar to R pairs, shows correlation scatter plots between columns and distribution for each 
# column along the diagonal.
# The R version that uses formulas does not seem to have a Python equivalent (and doesn't seem
# to be very useful for exploratory analysis IMO).
axes = pd.tools.plotting.scatter_matrix(auto_df, color="brown")