from __future__ import division import pandas as pd import numpy as np import matplotlib.pyplot as plt %matplotlib inline x = np.array([2,7,5]) # explicit vector creation x y = np.arange(4, 13, 3) # vector creation from a sequence (start, stop, step) y x + y # vectors can be added x / y # divided x ** y # exponentiated x[1] # vector elements can be selected by position x[1:3] # multiple elements can be selected using slices x[-2] # elements can be specified as offset from end x[np.array([0,1])] # elements can be specified as an array Z = np.matrix(np.arange(1,13)).reshape((4, 3)) Z # note: R arranges the elements column-wise Z[2:4, 1:3] # R is 1-based and includes ending index, Python is 0 based and does not. Z[:, 1:3] # column slice Z.shape x = np.random.uniform(0.0, 1.0, 50) x y = np.random.normal(0.0, 1.0, 50) y fig, ax = plt.subplots() plt.scatter(x, y) fig, ax = plt.subplots() plt.xlabel("Random Uniform") plt.ylabel("Random Normal") plt.scatter(x, y, marker='o', color='red') # plot customizations plt.subplot(121) # parameter indicates 1 rows, 2 col, first figure plt.scatter(x, y) plt.subplot(122) plt.hist(y) # data comes from ISL book site: http://www-bcf.usc.edu/~gareth/ISL/data.html auto_df = pd.read_csv("../data/Auto.csv") auto_df.columns # column names auto_df.shape # number of rows, number of columns type(auto_df) auto_df.describe() # equivalent of R's DataFrame.summary() plt.ylabel("MPG") auto_df.plot(x="cylinders", y="mpg", style='o') auto_df.boxplot(column="mpg", by="cylinders") # MPG distribution by number of cylinders # similar to R pairs, shows correlation scatter plots between columns and distribution for each # column along the diagonal. # The R version that uses formulas does not seem to have a Python equivalent (and doesn't seem # to be very useful for exploratory analysis IMO). axes = pd.tools.plotting.scatter_matrix(auto_df, color="brown")