import numpy as np
cd D:\Documents\Class\CSC478\Data
D:\Documents\Class\CSC478\Data
vstable = np.genfromtxt("Video_Store.csv", delimiter=",", dtype=None)
print vstable
[['ID' 'Gender' 'Income' 'Age' 'Rentals' 'AvgPerVisit' 'Genre'] ['1' 'M' '45000 ' '25' '27' '2.5' 'Action'] ['2' 'F' '54000 ' '33' '12' '3.4' 'Drama'] ['3' 'F' '32000 ' '20' '42' '1.6' 'Comedy'] ['4' 'F' '59000 ' '70' '16' '4.2' 'Drama'] ['5' 'M' '37000 ' '35' '25' '3.2' 'Action'] ['6' 'M' '18000 ' '20' '33' '1.7' 'Action'] ['7' 'F' '29000 ' '45' '19' '3.8' 'Drama'] ['8' 'M' '74000 ' '25' '31' '2.4' 'Action'] ['9' 'M' '38000 ' '21' '18' '2.1' 'Comedy'] ['10' 'F' '65000 ' '40' '21' '3.3' 'Drama'] ['11' 'F' '41000 ' '22' '48' '2.3' 'Drama'] ['12' 'F' '26000 ' '22' '29' '2.9' 'Action'] ['13' 'M' '83000 ' '46' '14' '3.6' 'Comedy'] ['14' 'M' '45000 ' '36' '24' '2.7' 'Drama'] ['15' 'M' '68000 ' '30' '36' '2.7' 'Comedy'] ['16' 'M' '17000 ' '19' '26' '2.2' 'Action'] ['17' 'M' '36000 ' '35' '28' '3.5' 'Drama'] ['18' 'F' '6000 ' '16' '39' '1.8' 'Action'] ['19' 'F' '24000 ' '25' '41' '3.1' 'Comedy'] ['20' 'M' '12000 ' '16' '23' '2.2' 'Action'] ['21' 'F' '47000 ' '52' '11' '3.1' 'Drama'] ['22' 'M' '25000 ' '33' '16' '2.9' 'Drama'] ['23' 'F' '2000 ' '15' '30' '2.5' 'Comedy'] ['24' 'F' '79000 ' '35' '22' '3.8' 'Drama'] ['25' 'M' '1000 ' '16' '25' '1.4' 'Comedy'] ['26' 'F' '56000 ' '35' '40' '2.6' 'Action'] ['27' 'F' '62000 ' '47' '32' '3.6' 'Drama'] ['28' 'M' '57000 ' '52' '22' '4.1' 'Comedy'] ['29' 'F' '15000 ' '18' '37' '2.1' 'Action'] ['30' 'M' '41000 ' '25' '17' '1.4' 'Action'] ['31' 'F' '49000 ' '56' '15' '3.2' 'Comedy'] ['32' 'M' '47000 ' '30' '21' '3.1' 'Drama'] ['33' 'M' '23000 ' '25' '28' '2.7' 'Action'] ['34' 'F' '29000 ' '32' '19' '2.9' 'Action'] ['35' 'M' '74000 ' '29' '43' '4.6' 'Action'] ['36' 'F' '29000 ' '21' '34' '2.3' 'Comedy'] ['37' 'M' '89000 ' '46' '12' '1.2' 'Comedy'] ['38' 'M' '41000 ' '38' '20' '3.3' 'Drama'] ['39' 'F' '68000 ' '35' '19' '3.9' 'Comedy'] ['40' 'M' '17000 ' '19' '32' '1.8' 'Action']]
labels = vstable[0]
print labels
['ID' 'Gender' 'Income' 'Age' 'Rentals' 'AvgPerVisit' 'Genre']
vstable = vstable[1:]
vstable[0:5]
array([['1', 'M', '45000 ', '25', '27', '2.5', 'Action'], ['2', 'F', '54000 ', '33', '12', '3.4', 'Drama'], ['3', 'F', '32000 ', '20', '42', '1.6', 'Comedy'], ['4', 'F', '59000 ', '70', '16', '4.2', 'Drama'], ['5', 'M', '37000 ', '35', '25', '3.2', 'Action']], dtype='|S11')
age = np.array(vstable[:,3], dtype=int)
print age
[25 33 20 70 35 20 45 25 21 40 22 22 46 36 30 19 35 16 25 16 52 33 15 35 16 35 47 52 18 25 56 30 25 32 29 21 46 38 35 19]
sal = np.array(vstable[:,2], dtype=float)
print sal
[ 45000. 54000. 32000. 59000. 37000. 18000. 29000. 74000. 38000. 65000. 41000. 26000. 83000. 45000. 68000. 17000. 36000. 6000. 24000. 12000. 47000. 25000. 2000. 79000. 1000. 56000. 62000. 57000. 15000. 41000. 49000. 47000. 23000. 29000. 74000. 29000. 89000. 41000. 68000. 17000.]
min_sal = sal.min()
max_sal = sal.max()
print min_sal, max_sal
1000.0 89000.0
visit_avg = np.array(vstable[:,5], dtype=float)
rentals = np.array(vstable[:,4], dtype=float)
norm_sal = [(x-min_sal)/(max_sal-min_sal) for x in sal]
print norm_sal
[0.5, 0.60227272727272729, 0.35227272727272729, 0.65909090909090906, 0.40909090909090912, 0.19318181818181818, 0.31818181818181818, 0.82954545454545459, 0.42045454545454547, 0.72727272727272729, 0.45454545454545453, 0.28409090909090912, 0.93181818181818177, 0.5, 0.76136363636363635, 0.18181818181818182, 0.39772727272727271, 0.056818181818181816, 0.26136363636363635, 0.125, 0.52272727272727271, 0.27272727272727271, 0.011363636363636364, 0.88636363636363635, 0.0, 0.625, 0.69318181818181823, 0.63636363636363635, 0.15909090909090909, 0.45454545454545453, 0.54545454545454541, 0.52272727272727271, 0.25, 0.31818181818181818, 0.82954545454545459, 0.31818181818181818, 1.0, 0.45454545454545453, 0.76136363636363635, 0.18181818181818182]
np.set_printoptions(precision=4, linewidth=80, suppress=True)
sal_range = max_sal - min_sal
norm_sal = (sal - min_sal) / sal_range
print norm_sal
[ 0.5 0.6023 0.3523 0.6591 0.4091 0.1932 0.3182 0.8295 0.4205 0.7273 0.4545 0.2841 0.9318 0.5 0.7614 0.1818 0.3977 0.0568 0.2614 0.125 0.5227 0.2727 0.0114 0.8864 0. 0.625 0.6932 0.6364 0.1591 0.4545 0.5455 0.5227 0.25 0.3182 0.8295 0.3182 1. 0.4545 0.7614 0.1818]
age_mean = age.mean()
age_std = age.std()
print age_mean, age_std
31.5 12.5916639091
age_znorm = (age - age_mean) / age_std
print age_znorm
[-0.5162 0.1191 -0.9133 3.0576 0.278 -0.9133 1.0721 -0.5162 -0.8339 0.675 -0.7545 -0.7545 1.1516 0.3574 -0.1191 -0.9927 0.278 -1.231 -0.5162 -1.231 1.6281 0.1191 -1.3104 0.278 -1.231 0.278 1.231 1.6281 -1.0721 -0.5162 1.9457 -0.1191 -0.5162 0.0397 -0.1985 -0.8339 1.1516 0.5162 0.278 -0.9927]
is_good = np.array(rentals >= 30)
good_cust = np.array(vstable[is_good])
print good_cust
[['3' 'F' '32000 ' '20' '42' '1.6' 'Comedy'] ['6' 'M' '18000 ' '20' '33' '1.7' 'Action'] ['8' 'M' '74000 ' '25' '31' '2.4' 'Action'] ['11' 'F' '41000 ' '22' '48' '2.3' 'Drama'] ['15' 'M' '68000 ' '30' '36' '2.7' 'Comedy'] ['18' 'F' '6000 ' '16' '39' '1.8' 'Action'] ['19' 'F' '24000 ' '25' '41' '3.1' 'Comedy'] ['23' 'F' '2000 ' '15' '30' '2.5' 'Comedy'] ['26' 'F' '56000 ' '35' '40' '2.6' 'Action'] ['27' 'F' '62000 ' '47' '32' '3.6' 'Drama'] ['29' 'F' '15000 ' '18' '37' '2.1' 'Action'] ['35' 'M' '74000 ' '29' '43' '4.6' 'Action'] ['36' 'F' '29000 ' '21' '34' '2.3' 'Comedy'] ['40' 'M' '17000 ' '19' '32' '1.8' 'Action']]
gender = np.array(vstable[:,1])
gender
array(['M', 'F', 'F', 'F', 'M', 'M', 'F', 'M', 'M', 'F', 'F', 'F', 'M', 'M', 'M', 'M', 'M', 'F', 'F', 'M', 'F', 'M', 'F', 'F', 'M', 'F', 'F', 'M', 'F', 'M', 'F', 'M', 'M', 'F', 'M', 'F', 'M', 'M', 'F', 'M'], dtype='|S11')
gen_f = np.zeros(len(gender))
gen_f
array([ 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
gen_f[gender=='F'] = 1
gen_f
array([ 0., 1., 1., 1., 0., 0., 1., 0., 0., 1., 1., 1., 0., 0., 0., 0., 0., 1., 1., 0., 1., 0., 1., 1., 0., 1., 1., 0., 1., 0., 1., 0., 0., 1., 0., 1., 0., 0., 1., 0.])
gen_m = np.zeros(len(gender))
gen_m[gender=='M'] = 1
gen_m
array([ 1., 0., 0., 0., 1., 1., 0., 1., 1., 0., 0., 0., 1., 1., 1., 1., 1., 0., 0., 1., 0., 1., 0., 0., 1., 0., 0., 1., 0., 1., 0., 1., 1., 0., 1., 0., 1., 1., 0., 1.])
vs_new = np.array([gen_f,gen_m,sal,age,rentals,visit_avg])
vs_new = vs_new.T
np.set_printoptions(linewidth=80)
#Here are the first 5 elements of the new array
print vs_new[0:5]
[[ 0. 1. 45000. 25. 27. 2.5] [ 1. 0. 54000. 33. 12. 3.4] [ 1. 0. 32000. 20. 42. 1.6] [ 1. 0. 59000. 70. 16. 4.2] [ 0. 1. 37000. 35. 25. 3.2]]
f_mean,m_mean,sal_mean,age_mean,rentals_mean,visavg_mean= vs_new.mean(axis=0)
print " Gen=F Gen=M Income Age Rntls VisAvg"
print "Mean: ", f_mean,m_mean,sal_mean,age_mean,rentals_mean,visavg_mean
Gen=F Gen=M Income Age Rntls VisAvg Mean: 0.475 0.525 41500.0 31.5 26.175 2.7925
#Now that the data is in all numeric form, we can apply techiques such as correlation analysis on the variables
np.corrcoef(vs_new.T)
array([[ 1. , -1. , -0.0365, 0.161 , 0.1515, 0.2033], [-1. , 1. , 0.0365, -0.161 , -0.1515, -0.2033], [-0.0365, 0.0365, 1. , 0.6138, -0.2625, 0.4686], [ 0.161 , -0.161 , 0.6138, 1. , -0.5471, 0.6291], [ 0.1515, -0.1515, -0.2625, -0.5471, 1. , -0.2064], [ 0.2033, -0.2033, 0.4686, 0.6291, -0.2064, 1. ]])
out_file = open("new_video_store.csv", "w")
np.savetxt(out_file, vs_new, fmt='%d,%d,%1.2f,%1.2f,%1.2f,%1.2f', delimiter=',')
vs = np.genfromtxt("Video_Store.csv", delimiter=",", names=True, dtype=(int, "|S1", float, int, int, float, "|S10"))
print vs
[(1, 'M', 45000.0, 25, 27, 2.5, 'Action') (2, 'F', 54000.0, 33, 12, 3.4, 'Drama') (3, 'F', 32000.0, 20, 42, 1.6, 'Comedy') (4, 'F', 59000.0, 70, 16, 4.2, 'Drama') (5, 'M', 37000.0, 35, 25, 3.2, 'Action') (6, 'M', 18000.0, 20, 33, 1.7, 'Action') (7, 'F', 29000.0, 45, 19, 3.8, 'Drama') (8, 'M', 74000.0, 25, 31, 2.4, 'Action') (9, 'M', 38000.0, 21, 18, 2.1, 'Comedy') (10, 'F', 65000.0, 40, 21, 3.3, 'Drama') (11, 'F', 41000.0, 22, 48, 2.3, 'Drama') (12, 'F', 26000.0, 22, 29, 2.9, 'Action') (13, 'M', 83000.0, 46, 14, 3.6, 'Comedy') (14, 'M', 45000.0, 36, 24, 2.7, 'Drama') (15, 'M', 68000.0, 30, 36, 2.7, 'Comedy') (16, 'M', 17000.0, 19, 26, 2.2, 'Action') (17, 'M', 36000.0, 35, 28, 3.5, 'Drama') (18, 'F', 6000.0, 16, 39, 1.8, 'Action') (19, 'F', 24000.0, 25, 41, 3.1, 'Comedy') (20, 'M', 12000.0, 16, 23, 2.2, 'Action') (21, 'F', 47000.0, 52, 11, 3.1, 'Drama') (22, 'M', 25000.0, 33, 16, 2.9, 'Drama') (23, 'F', 2000.0, 15, 30, 2.5, 'Comedy') (24, 'F', 79000.0, 35, 22, 3.8, 'Drama') (25, 'M', 1000.0, 16, 25, 1.4, 'Comedy') (26, 'F', 56000.0, 35, 40, 2.6, 'Action') (27, 'F', 62000.0, 47, 32, 3.6, 'Drama') (28, 'M', 57000.0, 52, 22, 4.1, 'Comedy') (29, 'F', 15000.0, 18, 37, 2.1, 'Action') (30, 'M', 41000.0, 25, 17, 1.4, 'Action') (31, 'F', 49000.0, 56, 15, 3.2, 'Comedy') (32, 'M', 47000.0, 30, 21, 3.1, 'Drama') (33, 'M', 23000.0, 25, 28, 2.7, 'Action') (34, 'F', 29000.0, 32, 19, 2.9, 'Action') (35, 'M', 74000.0, 29, 43, 4.6, 'Action') (36, 'F', 29000.0, 21, 34, 2.3, 'Comedy') (37, 'M', 89000.0, 46, 12, 1.2, 'Comedy') (38, 'M', 41000.0, 38, 20, 3.3, 'Drama') (39, 'F', 68000.0, 35, 19, 3.9, 'Comedy') (40, 'M', 17000.0, 19, 32, 1.8, 'Action')]
np.dtype(vs[0])
dtype([('ID', '<i4'), ('Gender', 'S1'), ('Income', '<f8'), ('Age', '<i4'), ('Rentals', '<i4'), ('AvgPerVisit', '<f8'), ('Genre', 'S10')])
print vs['Gender']
['M' 'F' 'F' 'F' 'M' 'M' 'F' 'M' 'M' 'F' 'F' 'F' 'M' 'M' 'M' 'M' 'M' 'F' 'F' 'M' 'F' 'M' 'F' 'F' 'M' 'F' 'F' 'M' 'F' 'M' 'F' 'M' 'M' 'F' 'M' 'F' 'M' 'M' 'F' 'M']
print vs['Income']
[ 45000. 54000. 32000. 59000. 37000. 18000. 29000. 74000. 38000. 65000. 41000. 26000. 83000. 45000. 68000. 17000. 36000. 6000. 24000. 12000. 47000. 25000. 2000. 79000. 1000. 56000. 62000. 57000. 15000. 41000. 49000. 47000. 23000. 29000. 74000. 29000. 89000. 41000. 68000. 17000.]
print sum(vs['Genre']=='Action')
15
is_good = np.array(vs['Rentals'] >= 30)
good_cust = np.array(vs[is_good])
good_cust
array([(3, 'F', 32000.0, 20, 42, 1.6, 'Comedy'), (6, 'M', 18000.0, 20, 33, 1.7, 'Action'), (8, 'M', 74000.0, 25, 31, 2.4, 'Action'), (11, 'F', 41000.0, 22, 48, 2.3, 'Drama'), (15, 'M', 68000.0, 30, 36, 2.7, 'Comedy'), (18, 'F', 6000.0, 16, 39, 1.8, 'Action'), (19, 'F', 24000.0, 25, 41, 3.1, 'Comedy'), (23, 'F', 2000.0, 15, 30, 2.5, 'Comedy'), (26, 'F', 56000.0, 35, 40, 2.6, 'Action'), (27, 'F', 62000.0, 47, 32, 3.6, 'Drama'), (29, 'F', 15000.0, 18, 37, 2.1, 'Action'), (35, 'M', 74000.0, 29, 43, 4.6, 'Action'), (36, 'F', 29000.0, 21, 34, 2.3, 'Comedy'), (40, 'M', 17000.0, 19, 32, 1.8, 'Action')], dtype=[('ID', '<i4'), ('Gender', 'S1'), ('Income', '<f8'), ('Age', '<i4'), ('Rentals', '<i4'), ('AvgPerVisit', '<f8'), ('Genre', 'S10')])
print "Min Rentals: ", good_cust['Rentals'].min()
print "Max Rentals: ", good_cust['Rentals'].max()
print "Rentals Mean: ", good_cust['Rentals'].mean()
print "Rentals Median: ", np.median(good_cust['Rentals'])
print "Rentals Std. Dev.: ", good_cust['Rentals'].std()
Min Rentals: 30 Max Rentals: 48 Rentals Mean: 37.0 Rentals Median: 36.5 Rentals Std. Dev.: 5.15474815791
vs_nid = np.genfromtxt("Video_Store.csv", delimiter=",", usecols=(1,2,3,4,5,6), names=True, dtype=("|S1", float, int, int, float, "|S10"))
vs_nid[0:5]
array([('M', 45000.0, 25, 27, 2.5, 'Action'), ('F', 54000.0, 33, 12, 3.4, 'Drama'), ('F', 32000.0, 20, 42, 1.6, 'Comedy'), ('F', 59000.0, 70, 16, 4.2, 'Drama'), ('M', 37000.0, 35, 25, 3.2, 'Action')], dtype=[('Gender', 'S1'), ('Income', '<f8'), ('Age', '<i4'), ('Rentals', '<i4'), ('AvgPerVisit', '<f8'), ('Genre', 'S10')])
names = vs_nid.dtype.names
vs_dict = [dict(zip(names, record)) for record in vs_nid]
print vs_dict[0]
{'Gender': 'M', 'Age': 25, 'AvgPerVisit': 2.5, 'Income': 45000.0, 'Genre': 'Action', 'Rentals': 27}
from sklearn.feature_extraction import DictVectorizer
vs_vec = DictVectorizer()
np.set_printoptions(linewidth=100)
vs_vec.fit_transform(vs_dict).toarray()
array([[ 25. , 2.5, 0. , 1. , 1. , 0. , 0. , 45000. , 27. ], [ 33. , 3.4, 1. , 0. , 0. , 0. , 1. , 54000. , 12. ], [ 20. , 1.6, 1. , 0. , 0. , 1. , 0. , 32000. , 42. ], [ 70. , 4.2, 1. , 0. , 0. , 0. , 1. , 59000. , 16. ], [ 35. , 3.2, 0. , 1. , 1. , 0. , 0. , 37000. , 25. ], [ 20. , 1.7, 0. , 1. , 1. , 0. , 0. , 18000. , 33. ], [ 45. , 3.8, 1. , 0. , 0. , 0. , 1. , 29000. , 19. ], [ 25. , 2.4, 0. , 1. , 1. , 0. , 0. , 74000. , 31. ], [ 21. , 2.1, 0. , 1. , 0. , 1. , 0. , 38000. , 18. ], [ 40. , 3.3, 1. , 0. , 0. , 0. , 1. , 65000. , 21. ], [ 22. , 2.3, 1. , 0. , 0. , 0. , 1. , 41000. , 48. ], [ 22. , 2.9, 1. , 0. , 1. , 0. , 0. , 26000. , 29. ], [ 46. , 3.6, 0. , 1. , 0. , 1. , 0. , 83000. , 14. ], [ 36. , 2.7, 0. , 1. , 0. , 0. , 1. , 45000. , 24. ], [ 30. , 2.7, 0. , 1. , 0. , 1. , 0. , 68000. , 36. ], [ 19. , 2.2, 0. , 1. , 1. , 0. , 0. , 17000. , 26. ], [ 35. , 3.5, 0. , 1. , 0. , 0. , 1. , 36000. , 28. ], [ 16. , 1.8, 1. , 0. , 1. , 0. , 0. , 6000. , 39. ], [ 25. , 3.1, 1. , 0. , 0. , 1. , 0. , 24000. , 41. ], [ 16. , 2.2, 0. , 1. , 1. , 0. , 0. , 12000. , 23. ], [ 52. , 3.1, 1. , 0. , 0. , 0. , 1. , 47000. , 11. ], [ 33. , 2.9, 0. , 1. , 0. , 0. , 1. , 25000. , 16. ], [ 15. , 2.5, 1. , 0. , 0. , 1. , 0. , 2000. , 30. ], [ 35. , 3.8, 1. , 0. , 0. , 0. , 1. , 79000. , 22. ], [ 16. , 1.4, 0. , 1. , 0. , 1. , 0. , 1000. , 25. ], [ 35. , 2.6, 1. , 0. , 1. , 0. , 0. , 56000. , 40. ], [ 47. , 3.6, 1. , 0. , 0. , 0. , 1. , 62000. , 32. ], [ 52. , 4.1, 0. , 1. , 0. , 1. , 0. , 57000. , 22. ], [ 18. , 2.1, 1. , 0. , 1. , 0. , 0. , 15000. , 37. ], [ 25. , 1.4, 0. , 1. , 1. , 0. , 0. , 41000. , 17. ], [ 56. , 3.2, 1. , 0. , 0. , 1. , 0. , 49000. , 15. ], [ 30. , 3.1, 0. , 1. , 0. , 0. , 1. , 47000. , 21. ], [ 25. , 2.7, 0. , 1. , 1. , 0. , 0. , 23000. , 28. ], [ 32. , 2.9, 1. , 0. , 1. , 0. , 0. , 29000. , 19. ], [ 29. , 4.6, 0. , 1. , 1. , 0. , 0. , 74000. , 43. ], [ 21. , 2.3, 1. , 0. , 0. , 1. , 0. , 29000. , 34. ], [ 46. , 1.2, 0. , 1. , 0. , 1. , 0. , 89000. , 12. ], [ 38. , 3.3, 0. , 1. , 0. , 0. , 1. , 41000. , 20. ], [ 35. , 3.9, 1. , 0. , 0. , 1. , 0. , 68000. , 19. ], [ 19. , 1.8, 0. , 1. , 1. , 0. , 0. , 17000. , 32. ]])
vs_vec.get_feature_names()
['Age', 'AvgPerVisit', 'Gender=F', 'Gender=M', 'Genre=Action', 'Genre=Comedy', 'Genre=Drama', 'Income', 'Rentals']
import matplotlib.pyplot as plt
%matplotlib inline
plt.hist(vs['Income'], bins=9, alpha=0.5)
plt.xlabel('Income')
plt.ylabel('Count')
plt.title('Histogram of Income')
plt.axis([0, 100000, 0, 10])
plt.grid(True)
plt.show()
# First we need the counts for males and females across different genres
m_counts = [14, 6, 8] # counts of Action, Comedy, Drama for male custs.
f_counts = [8, 6, 12] # counts of Action, Comedy, Drama for female custs.
N = len(f_counts)
ind = np.arange(N) # the x locations for the groups
ind = ind + 0.15
width = 0.35 # the width of the bars
fig, ax = plt.subplots()
rects1 = ax.bar(ind, f_counts, width, color='b')
rects2 = ax.bar(ind+width, m_counts, width, color='r')
rects1 = plt.bar(ind, f_counts, width, color='b')
rects2 = plt.bar(ind+width, m_counts, width, color='r')
ax.set_ylabel('Counts')
ax.set_ybound(upper=16)
ax.set_title('Counts by Genre and Gender')
ax.set_xticks(ind+width)
ax.set_xticklabels( ('Action', 'Comedy', 'Drama') )
ax.legend( (rects1[0], rects2[0]), ('Female', 'Male') )
plt.show()
# plt.savefig("figure.pdf")
fig = plt.figure(figsize=(5, 4))
# Create an Axes object.
ax = fig.add_subplot(1,1,1) # one row, one column, first plot
# Plot the data.
ax.scatter(vs['Age'], vs['Income'], color="blue", marker="*")
# Add a title.
ax.set_title("Age VS. Income")
# Add some axis labels.
ax.set_xlabel("Age")
ax.set_ylabel("Income")
# Produce an image.
# fig.savefig("scatterplot.png")
plt.show()