%matplotlib inline import pandas as pd import matplotlib.pyplot as plt import numpy as np raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'], 'last_name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze'], 'female': [0, 1, 1, 0, 1], 'age': [42, 52, 36, 24, 73], 'preTestScore': [4, 24, 31, 2, 3], 'postTestScore': [25, 94, 57, 62, 70]} df = pd.DataFrame(raw_data, columns = ['first_name', 'last_name', 'age', 'female', 'preTestScore', 'postTestScore']) df plt.scatter(df.preTestScore, df.postTestScore , s=df.age) plt.scatter(df.preTestScore, df.postTestScore, s=300, c='m')