pwd
'C:\\Users\\bmobashe\\Jupyter Notebooks'
cd ..
C:\Users\bmobashe
cd C:\Users\bmobashe\Desktop\Class\Data
C:\Users\bmobashe\Desktop\Class\Data
more populations.txt
datafile = open("populations.txt", "r")
#first line of the file contains labels for column names
labels_line = datafile.readline().strip()
poptable = []
total = 0
for line in datafile.readlines():
line = line.strip()
row = line.split("\t")
record = [int(row[0]),float(row[1]),float(row[2]), float(row[3])]
print(record)
poptable.append(record)
total += 1
print(total)
[1900, 30000.0, 4000.0, 48300.0] [1901, 47200.0, 6100.0, 48200.0] [1902, 70200.0, 9800.0, 41500.0] [1903, 77400.0, 35200.0, 38200.0] [1904, 36300.0, 59400.0, 40600.0] [1905, 20600.0, 41700.0, 39800.0] [1906, 18100.0, 19000.0, 38600.0] [1907, 21400.0, 13000.0, 42300.0] [1908, 22000.0, 8300.0, 44500.0] [1909, 25400.0, 9100.0, 42100.0] [1910, 27100.0, 7400.0, 46000.0] [1911, 40300.0, 8000.0, 46800.0] [1912, 57000.0, 12300.0, 43800.0] [1913, 76600.0, 19500.0, 40900.0] [1914, 52300.0, 45700.0, 39400.0] [1915, 19500.0, 51100.0, 39000.0] [1916, 11200.0, 29700.0, 36700.0] [1917, 7600.0, 15800.0, 41800.0] [1918, 14600.0, 9700.0, 43300.0] [1919, 16200.0, 10100.0, 41300.0] [1920, 24700.0, 8600.0, 47300.0] 21
poptable
[[1900, 30000.0, 4000.0, 48300.0], [1901, 47200.0, 6100.0, 48200.0], [1902, 70200.0, 9800.0, 41500.0], [1903, 77400.0, 35200.0, 38200.0], [1904, 36300.0, 59400.0, 40600.0], [1905, 20600.0, 41700.0, 39800.0], [1906, 18100.0, 19000.0, 38600.0], [1907, 21400.0, 13000.0, 42300.0], [1908, 22000.0, 8300.0, 44500.0], [1909, 25400.0, 9100.0, 42100.0], [1910, 27100.0, 7400.0, 46000.0], [1911, 40300.0, 8000.0, 46800.0], [1912, 57000.0, 12300.0, 43800.0], [1913, 76600.0, 19500.0, 40900.0], [1914, 52300.0, 45700.0, 39400.0], [1915, 19500.0, 51100.0, 39000.0], [1916, 11200.0, 29700.0, 36700.0], [1917, 7600.0, 15800.0, 41800.0], [1918, 14600.0, 9700.0, 43300.0], [1919, 16200.0, 10100.0, 41300.0], [1920, 24700.0, 8600.0, 47300.0]]
# printing the top5 elements (rows) of poptable from index 0 upto (but not including) index 5
poptable[0:5]
[[1900, 30000.0, 4000.0, 48300.0], [1901, 47200.0, 6100.0, 48200.0], [1902, 70200.0, 9800.0, 41500.0], [1903, 77400.0, 35200.0, 38200.0], [1904, 36300.0, 59400.0, 40600.0]]
hares = {}
lynxes = {}
carrots = {}
for row in poptable:
hares[row[0]] = row[1]
lynxes[row[0]] = row[2]
carrots[row[0]] = row[3]
list(hares.items())
[(1900, 30000.0), (1901, 47200.0), (1902, 70200.0), (1903, 77400.0), (1904, 36300.0), (1905, 20600.0), (1906, 18100.0), (1907, 21400.0), (1908, 22000.0), (1909, 25400.0), (1910, 27100.0), (1911, 40300.0), (1912, 57000.0), (1913, 76600.0), (1914, 52300.0), (1915, 19500.0), (1916, 11200.0), (1917, 7600.0), (1918, 14600.0), (1919, 16200.0), (1920, 24700.0)]
print(hares[1903])
77400.0
# finding the years during which the population of hares was greater than 50K
# Here we'll use standard Python list comprehensions
hares_above_50k = [yr for yr in hares.keys() if hares[yr]>50000.0]
hares_above_50k
[1902, 1903, 1912, 1913, 1914]
# Finding the year(s) with maximal value of Hares
maxhares = [yr for yr in hares.keys() if hares[yr] == max(hares.values())]
for i in range(0,len(maxhares)):
print(maxhares[i], hares[maxhares[i]])
1903 77400.0
import numpy as np
pop = np.array(poptable)
print(pop)
[[ 1900. 30000. 4000. 48300.] [ 1901. 47200. 6100. 48200.] [ 1902. 70200. 9800. 41500.] [ 1903. 77400. 35200. 38200.] [ 1904. 36300. 59400. 40600.] [ 1905. 20600. 41700. 39800.] [ 1906. 18100. 19000. 38600.] [ 1907. 21400. 13000. 42300.] [ 1908. 22000. 8300. 44500.] [ 1909. 25400. 9100. 42100.] [ 1910. 27100. 7400. 46000.] [ 1911. 40300. 8000. 46800.] [ 1912. 57000. 12300. 43800.] [ 1913. 76600. 19500. 40900.] [ 1914. 52300. 45700. 39400.] [ 1915. 19500. 51100. 39000.] [ 1916. 11200. 29700. 36700.] [ 1917. 7600. 15800. 41800.] [ 1918. 14600. 9700. 43300.] [ 1919. 16200. 10100. 41300.] [ 1920. 24700. 8600. 47300.]]
poptable = np.loadtxt('populations.txt', skiprows=1)
print(poptable)
[[ 1900. 30000. 4000. 48300.] [ 1901. 47200. 6100. 48200.] [ 1902. 70200. 9800. 41500.] [ 1903. 77400. 35200. 38200.] [ 1904. 36300. 59400. 40600.] [ 1905. 20600. 41700. 39800.] [ 1906. 18100. 19000. 38600.] [ 1907. 21400. 13000. 42300.] [ 1908. 22000. 8300. 44500.] [ 1909. 25400. 9100. 42100.] [ 1910. 27100. 7400. 46000.] [ 1911. 40300. 8000. 46800.] [ 1912. 57000. 12300. 43800.] [ 1913. 76600. 19500. 40900.] [ 1914. 52300. 45700. 39400.] [ 1915. 19500. 51100. 39000.] [ 1916. 11200. 29700. 36700.] [ 1917. 7600. 15800. 41800.] [ 1918. 14600. 9700. 43300.] [ 1919. 16200. 10100. 41300.] [ 1920. 24700. 8600. 47300.]]
poptable.shape
(21L, 4L)
year, hares, lynxes, carrots = poptable.T
np.set_printoptions(linewidth=100)
print(year)
print(hares)
print("Mean Hare Population: ", hares.mean())
[1900. 1901. 1902. 1903. 1904. 1905. 1906. 1907. 1908. 1909. 1910. 1911. 1912. 1913. 1914. 1915. 1916. 1917. 1918. 1919. 1920.] [30000. 47200. 70200. 77400. 36300. 20600. 18100. 21400. 22000. 25400. 27100. 40300. 57000. 76600. 52300. 19500. 11200. 7600. 14600. 16200. 24700.] Mean Hare Population: 34080.95238095238
# finding all years when the population of one of the species is above 50k
above_50k = np.any(poptable>50000, axis=1) # axis=1 means the operation will be performed across columns
print(above_50k)
print(year[above_50k])
[False False True True True False False False False False False False True True True True False False False False False] [1902. 1903. 1904. 1912. 1913. 1914. 1915.]
pop_no_year = poptable[:,1:] # Removing the first column ("Year")
print(" Hares Lynxes Carrots")
print("Mean:", pop_no_year.mean(axis=0))
print("Std: ", pop_no_year.std(axis=0))
Hares Lynxes Carrots Mean: [34080.95238095 20166.66666667 42400. ] Std: [20897.90645809 16254.59153691 3322.50622558]
# Finding indecies of years when one of the populations was at max
j_max_years = np.argmax(pop_no_year, axis=0) # ranging over rows for each column
print("Indecies for the maximums:", j_max_years)
print(" Hares Lynxes Carrots")
print("Max. year:", year[j_max_years])
Indecies for the maximums: [3 4 0] Hares Lynxes Carrots Max. year: [1903. 1904. 1900.]
# Ranging over cols for each row, find the specie with the highest pop for each year
max_species = np.argmax(pop_no_year, axis=1)
species = np.array(['Hare', 'Lynx', 'Carrot'])
print(max_species)
print("Max specie from 1900 to 1920:")
print(species[max_species])
[2 2 0 0 1 1 2 2 2 2 2 2 0 0 0 1 2 2 2 2 2] Max specie from 1900 to 1920: ['Carrot' 'Carrot' 'Hare' 'Hare' 'Lynx' 'Lynx' 'Carrot' 'Carrot' 'Carrot' 'Carrot' 'Carrot' 'Carrot' 'Hare' 'Hare' 'Hare' 'Lynx' 'Carrot' 'Carrot' 'Carrot' 'Carrot' 'Carrot']
corr_matrix = np.corrcoef(pop_no_year.T)
print(corr_matrix)
[[ 1. 0.07189206 -0.01660378] [ 0.07189206 1. -0.68057717] [-0.01660378 -0.68057717 1. ]]
pop_with_keys = poptable.view(dtype=[('year', 'float'), ('hares', 'float'), ('lynxes', 'float'), ('carrots', 'float')])
pop_with_keys
array([[(1900., 30000., 4000., 48300.)], [(1901., 47200., 6100., 48200.)], [(1902., 70200., 9800., 41500.)], [(1903., 77400., 35200., 38200.)], [(1904., 36300., 59400., 40600.)], [(1905., 20600., 41700., 39800.)], [(1906., 18100., 19000., 38600.)], [(1907., 21400., 13000., 42300.)], [(1908., 22000., 8300., 44500.)], [(1909., 25400., 9100., 42100.)], [(1910., 27100., 7400., 46000.)], [(1911., 40300., 8000., 46800.)], [(1912., 57000., 12300., 43800.)], [(1913., 76600., 19500., 40900.)], [(1914., 52300., 45700., 39400.)], [(1915., 19500., 51100., 39000.)], [(1916., 11200., 29700., 36700.)], [(1917., 7600., 15800., 41800.)], [(1918., 14600., 9700., 43300.)], [(1919., 16200., 10100., 41300.)], [(1920., 24700., 8600., 47300.)]], dtype=[('year', '<f8'), ('hares', '<f8'), ('lynxes', '<f8'), ('carrots', '<f8')])
print(pop_with_keys['hares'])
[[30000.] [47200.] [70200.] [77400.] [36300.] [20600.] [18100.] [21400.] [22000.] [25400.] [27100.] [40300.] [57000.] [76600.] [52300.] [19500.] [11200.] [ 7600.] [14600.] [16200.] [24700.]]
sorted_by_hares = np.sort(pop_with_keys, order='hares', axis=0)
print(sorted_by_hares)
[[(1917., 7600., 15800., 41800.)] [(1916., 11200., 29700., 36700.)] [(1918., 14600., 9700., 43300.)] [(1919., 16200., 10100., 41300.)] [(1906., 18100., 19000., 38600.)] [(1915., 19500., 51100., 39000.)] [(1905., 20600., 41700., 39800.)] [(1907., 21400., 13000., 42300.)] [(1908., 22000., 8300., 44500.)] [(1920., 24700., 8600., 47300.)] [(1909., 25400., 9100., 42100.)] [(1910., 27100., 7400., 46000.)] [(1900., 30000., 4000., 48300.)] [(1904., 36300., 59400., 40600.)] [(1911., 40300., 8000., 46800.)] [(1901., 47200., 6100., 48200.)] [(1914., 52300., 45700., 39400.)] [(1912., 57000., 12300., 43800.)] [(1902., 70200., 9800., 41500.)] [(1913., 76600., 19500., 40900.)] [(1903., 77400., 35200., 38200.)]]
import matplotlib.pyplot as plt
%matplotlib inline
plt.plot(year, hares)
[<matplotlib.lines.Line2D at 0x207d3293708>]
plt.plot(year, hares, label='Hares')
plt.plot(year, lynxes, label='Lynxes')
plt.plot(year, carrots, label='Carrots')
plt.legend( ('Hares','Lynxes','Carrots') )
plt.ylabel('Population')
plt.xlabel('Year')
plt.show()
plt.hist(carrots, bins=8, alpha=0.5)
plt.xlabel('Carrots')
plt.ylabel('Count')
plt.title('Histogram of Carrot Populaions')
plt.axis([35000, 50000, 0, 6])
plt.grid(True)
plt.scatter(hares, carrots, color="blue", marker="*")
plt.xlabel('Hares')
plt.ylabel('Carrots')
plt.title('Hares v. Carrots')
plt.grid(True)
fig = plt.figure(figsize=(12, 4))
# Create an Axes object.
ax1 = fig.add_subplot(1,2,1) # one row, two column, first plot
# Plot the data.
ax1.scatter(hares, carrots, color="red", marker="*")
ax1.set_title("Hares vs. Carrots")
# Add some axis labels.
ax1.set_xlabel("Hare Population")
ax1.set_ylabel("Carrot Population")
ax2 = fig.add_subplot(1,2,2) # one row, two column, 2nd plot
# Plot the data.
ax2.scatter(hares, lynxes, color="blue", marker="^")
ax2.set_title("Hares vs. Lynxes")
# Add some axis labels.
ax2.set_xlabel("Hare Population")
ax2.set_ylabel("Lynx Population")
# Produce an image.
# fig.savefig("scatterplot.png")
Text(0, 0.5, 'Lynx Population')