from matplotlib import pyplot as plt
import csv
from collections import Counter
import pandas as pd
import numpy as np
plt.style.use('seaborn-white')
plt.rc('figure', figsize=(10, 6))
# Median Developer Salaries by Age
ages_x = [25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35]
dev_y = [38496, 42000, 46752, 49320, 53200,
56000, 62316, 64928, 67317, 68748, 73752]
# Median Python Developer Salaries by Age
py_dev_y = [45372, 48876, 53850, 57287, 63016,
65998, 70003, 70000, 71496, 75370, 83640]
# Median JavaScript Developer Salaries by Age
js_dev_y = [37810, 43515, 46823, 49293, 53437,
56373, 62375, 66674, 68745, 68746, 74583]
plt.bar(ages_x, dev_y, color='#444444', label='All Devs')
<BarContainer object of 11 artists>
# stacked
plt.bar(ages_x, dev_y, color='#444444', label='All Devs')
plt.bar(ages_x, py_dev_y, color='#008df5', label='Python')
plt.bar(ages_x, js_dev_y, color='#e5ae38', label='JavaScript')
plt.xlabel("Ages")
plt.ylabel("Median Salary (USD)")
plt.title("Median Salary (USD) by Age")
plt.legend()
# Automatically adjust subplot parameters to give specified padding.
plt.tight_layout()
plt.show()
# side by side
x_indexes = np.arange(len(ages_x))
bar_width = 0.25
plt.bar(x_indexes - bar_width, dev_y, width=bar_width, color='#444444', label='All Devs')
plt.bar(x_indexes, py_dev_y, width=bar_width, color='#008df5', label='Python')
plt.bar(x_indexes + bar_width, js_dev_y, width=bar_width, color='#e5ae38', label='JavaScript')
# without this line our age range is lost, to fix this:
plt.xticks(ticks=x_indexes, labels=ages_x)
plt.xlabel("Ages")
plt.ylabel("Median Salary (USD)")
plt.title("Median Salary (USD) by Age")
plt.legend()
plt.tight_layout()
plt.show()
# horizontal bar charts
# counter eg
c = Counter(['Python', 'Javascript'])
print(c)
c.update(['Python', 'Html'])
print(c)
Counter({'Python': 1, 'Javascript': 1}) Counter({'Python': 2, 'Javascript': 1, 'Html': 1})
# using csv module to read the data
with open(r'examples/data.csv') as csv_file:
csv_reader = csv.DictReader(csv_file)
language_counter = Counter()
for row in csv_reader:
language_counter.update(row['LanguagesWorkedWith'].split(';'))
# row = next(csv_reader)
# print(row['LanguagesWorkedWith'].split(';'))
languages = []
popularity = []
for lan, pop in language_counter.most_common(15):
languages.append(lan)
popularity.append(pop)
print(languages)
print(popularity)
# language_counter.most_common(15)
['JavaScript', 'HTML/CSS', 'SQL', 'Python', 'Java', 'Bash/Shell/PowerShell', 'C#', 'PHP', 'C++', 'TypeScript', 'C', 'Other(s):', 'Ruby', 'Go', 'Assembly'] [59219, 55466, 47544, 36443, 35917, 31991, 27097, 23030, 20524, 18523, 18017, 7920, 7331, 7201, 5833]
# plotting
languages.reverse()
popularity.reverse()
plt.barh(languages, popularity)
# plt.ylabel("Programming Languages")
plt.xlabel("Number of people who use")
plt.title("Most Popular Languages")
plt.tight_layout()
plt.show()
# using Pandas
data = pd.read_csv(r'examples/data.csv')
ids = data['Responder_id']
lang_responses = data['LanguagesWorkedWith']
language_counter = Counter()
for response in lang_responses:
language_counter.update(response.split(';'))
languages = []
popularity = []
for lan, pop in language_counter.most_common(15):
languages.append(lan)
popularity.append(pop)
# plotting
languages.reverse()
popularity.reverse()
plt.barh(languages, popularity)
# plt.ylabel("Programming Languages")
plt.xlabel("Number of people who use")
plt.title("Most Popular Languages")
plt.tight_layout()
plt.show()