In [1]:

from matplotlib import pyplot as plt
import csv
from collections import Counter
import pandas as pd
import numpy as np

In [2]:

plt.style.use('seaborn-white')
plt.rc('figure', figsize=(10, 6))

In [3]:

# Median Developer Salaries by Age
ages_x = [25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35]

dev_y = [38496, 42000, 46752, 49320, 53200,
         56000, 62316, 64928, 67317, 68748, 73752]

# Median Python Developer Salaries by Age

py_dev_y = [45372, 48876, 53850, 57287, 63016,
            65998, 70003, 70000, 71496, 75370, 83640]

# Median JavaScript Developer Salaries by Age
js_dev_y = [37810, 43515, 46823, 49293, 53437,
            56373, 62375, 66674, 68745, 68746, 74583]

In [4]:

plt.bar(ages_x, dev_y, color='#444444', label='All Devs')

Out[4]:

<BarContainer object of 11 artists>

In [5]:

# stacked

plt.bar(ages_x, dev_y, color='#444444', label='All Devs')

plt.bar(ages_x, py_dev_y, color='#008df5', label='Python')

plt.bar(ages_x, js_dev_y, color='#e5ae38', label='JavaScript')


plt.xlabel("Ages")
plt.ylabel("Median Salary (USD)")
plt.title("Median Salary (USD) by Age")

plt.legend()

# Automatically adjust subplot parameters to give specified padding.
plt.tight_layout()
plt.show()

In [6]:

# side by side

x_indexes = np.arange(len(ages_x)) 
bar_width = 0.25


plt.bar(x_indexes  - bar_width, dev_y, width=bar_width, color='#444444', label='All Devs')

plt.bar(x_indexes, py_dev_y, width=bar_width, color='#008df5', label='Python')

plt.bar(x_indexes + bar_width, js_dev_y, width=bar_width, color='#e5ae38', label='JavaScript')

# without this line our age range is lost, to fix this: 
plt.xticks(ticks=x_indexes, labels=ages_x)

plt.xlabel("Ages")
plt.ylabel("Median Salary (USD)")
plt.title("Median Salary (USD) by Age")

plt.legend()
plt.tight_layout()
plt.show()

In [7]:

# horizontal bar charts

In [8]:

# counter eg

c = Counter(['Python', 'Javascript'])
print(c)
c.update(['Python', 'Html'])
print(c)

Counter({'Python': 1, 'Javascript': 1})
Counter({'Python': 2, 'Javascript': 1, 'Html': 1})

In [9]:

# using csv module to read the data

with open(r'examples/data.csv') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    language_counter = Counter()
    
    for row in csv_reader:
        language_counter.update(row['LanguagesWorkedWith'].split(';'))
        
#     row = next(csv_reader)
#     print(row['LanguagesWorkedWith'].split(';'))

languages = []
popularity = []
for lan, pop in language_counter.most_common(15):
    languages.append(lan)
    popularity.append(pop)

print(languages)
print(popularity)
# language_counter.most_common(15)
    

['JavaScript', 'HTML/CSS', 'SQL', 'Python', 'Java', 'Bash/Shell/PowerShell', 'C#', 'PHP', 'C++', 'TypeScript', 'C', 'Other(s):', 'Ruby', 'Go', 'Assembly']
[59219, 55466, 47544, 36443, 35917, 31991, 27097, 23030, 20524, 18523, 18017, 7920, 7331, 7201, 5833]

In [10]:

# plotting

languages.reverse()
popularity.reverse()
plt.barh(languages, popularity)
# plt.ylabel("Programming Languages")
plt.xlabel("Number of people who use")
plt.title("Most Popular Languages")

plt.tight_layout()
plt.show()

In [11]:

# using Pandas

data = pd.read_csv(r'examples/data.csv')
ids = data['Responder_id']
lang_responses = data['LanguagesWorkedWith']

language_counter = Counter()

for response in lang_responses:
    language_counter.update(response.split(';'))
    

languages = []
popularity = []
for lan, pop in language_counter.most_common(15):
    languages.append(lan)
    popularity.append(pop)

In [12]:

# plotting

languages.reverse()
popularity.reverse()
plt.barh(languages, popularity)
# plt.ylabel("Programming Languages")
plt.xlabel("Number of people who use")
plt.title("Most Popular Languages")

plt.tight_layout()
plt.show()