Baby names iPython notebooks

  • By David Taylor, www.prooffreader.com
  • using data from United States Social Security Administration
  • I am making this public to give a head start to those who want to explore this dataset, so they don't have to download and format the data and the python objects used to do preliminary analysis. Please let me know if you find this helpful!

Printing graphs of names that match a list of names

List of Mythological names

Only Greek, Egyptian, Norse and Roman names are used, because there was too much confusion with others, e.g. Ora was a common girls' name with a Latin origin, but coincidentally was also a figure in Balto-Slavic mythology.

Note: this is an interactive script with repeated code (alas, not yet in functions) that shows the process of getting the data desired, not just the final result.

First pass with raw list

In [6]:
listed_path = "lists/mythological_names_eg_gk_ro_no.list"
totals_title = "Mythological names in U.S. Social Security baby names database, 1880-2013"
top_cutoff = 6

top_boys_title = "Top %d mythological boys' names from U.S. Social Security database, 1880-2013" % (top_cutoff)
top_girls_title = "Top %d mythological girls' names from U.S. Social Security database, 1880-2013" % (top_cutoff)
last_year = 2013 #change this when Social Security database is updated
save_path = "user_charts" # files created by this notebook will be saved in this directory

import time
import os
if not os.path.isdir(save_path): # creates path if it does not exist
    os.makedirs(save_path)
import math

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn # comment out if you don't have it, but it makes good-looking charts
print 'This is standard output from download_and_process.py'
%run download_and_process.py

print '--------------------\nFirst 80 characters of list:'
listed_file = open(listed_path, "r").read()
print listed_file[:80] + ' ...'
all_listed = eval(listed_file) # make sure you trust this file!
all_listed_set = set(all_listed) # to remove duplicates
all_listed = list(all_listed)
print "all_listed: list of length", len(all_listed)

# reduce names dataframe to those matching list
print '--------------------\nDataframe names filtered to those that match list'
print "%d records to begin." % (len(names))
names_listed = names[names.name.isin(all_listed)].copy()
names_listed.sort('pct_max', ascending=False, inplace=True)
print "%d records remaining." % (len(names_listed))
listed_in_df = list(names_listed.name)
print names_listed.head(10)
listed_m = list(names[(names.sex == 'M') & (names.name.isin(listed_in_df))]['name'])
listed_f = list(names[(names.sex == 'F') & (names.name.isin(listed_in_df))]['name'])

#reduce yob dataframe to those matching list
print '--------------------\nDataframe yob filtered to those that match list (count only)'
print "%d records to begin." % (len(yob))
yob_listed = yob[yob.name.isin(listed_in_df)].copy()
yob_listed.sort(['year', 'sex', 'name'], ascending=False, inplace=True)
print "%d records remaining." % (len(yob_listed))

# m and f totals
yob_listed_f_agg = pd.DataFrame(yob_listed[yob_listed.sex == 'F'].groupby('year').sum())[['births', 'pct']]
yob_listed_m_agg = pd.DataFrame(yob_listed[yob_listed.sex == 'M'].groupby('year').sum())[['births', 'pct']]
print '--------------------\nHead of total matching list per year, female'
print yob_listed_f_agg.head()

# print chart of m and f totals
print '\n'

# function to determine a nice y-axis limit a little above the maximum value
# rounds maximum y up to second-most-significant digit
def determine_y_limit(x):                      
    significance = int(math.floor((math.log10(x))))
    val = math.floor(x / (10 ** (significance - 1))) + 1
    val = val * (10 ** (significance - 1))
    return val

#data
xf = list(yob_listed_f_agg.index)
xm = list(yob_listed_m_agg.index)

plt.figure(figsize=(16,9))
plt.plot(xf, list(yob_listed_f_agg.pct), color="red")
plt.plot(xm, list(yob_listed_m_agg.pct), color="blue")

plt.ylim(0, determine_y_limit(max(list(yob_listed_f_agg.pct)
                                  +list(yob_listed_m_agg.pct))))
plt.xlim(1880, 2013)

plt.title(totals_title, fontsize = 20)
plt.xlabel("Year", fontsize = 14)
plt.ylabel("% of total births of that sex", fontsize = 14)

plt.show()

#function to make dataframe for top names

def top_df(yobdf, names, sexes):
    """ yobdf = dataframe derived from yob; normally it would just be yob itself.
        names = list of names
        sexes = list of length 1 for all the same sex, or same length as names. 'F' and 'M' allowed
        """

    df_chart = yobdf.copy()
    assert len(sexes) == 1 or len(names) == len(sexes)
    if len(sexes) == 1:
        sexes = sexes * len(names)

    df_chart = df_chart[df_chart['name'].isin(names)]   

    df_chart['temp'] = 0
    for row in range(len(df_chart)):
        for pos in range(len(names)):
            if df_chart.name.iloc[row] == names[pos] and df_chart.sex.iloc[row] == sexes[pos]:
                df_chart.temp.iloc[row] = 1
    df_chart = df_chart[df_chart.temp == 1]

    print "Tail of dataframe:"
    print df_chart.tail()

    output_df = pd.DataFrame(pd.pivot_table(df_chart, values='pct', index = 'year', columns=['name', 'sex']))

    col = output_df.columns[0]

    for yr in range(1880, last_year + 1): #inserts missing years
        if yr not in output_df.index:
            #output_df[col][yr] = 0.0
            output_df = output_df.append(pd.DataFrame(index=[yr], columns=[col], data=[0.0]))

    output_df = output_df.fillna(0)
    
    return output_df

listed_top_m = top_df(yob, listed_m[:top_cutoff], ['M'])
listed_top_f = top_df(yob, listed_f[:top_cutoff], ['F'])

#a single function to make the four different kinds of charts

def make_chart(df, form='line', title='', colors= [], smoothing=0, \
               groupedlist = [], baseline='sym', png_name=''):
    
    dataframe = df.copy()
    
    startyear = min(list(dataframe.index))
    endyear = max(list(dataframe.index))
    yearstr = '%d-%d' % (startyear, endyear)
    
    legend_size = 0.01
    
    has_male = False
    has_female = False
    has_both = False
    max_y = 0
    for name, sex in dataframe.columns:
        max_y = max(max_y, dataframe[(name, sex)].max())
        final_name = name
        if sex == 'M': has_male = True
        if sex == 'F': has_female = True
        if smoothing > 0:
            newvalues = []
            for row in range(len(dataframe)):
                start = max(0, row - smoothing)
                end = min(len(dataframe) - 1, row + smoothing)
                newvalues.append(dataframe[(name, sex)].iloc[start:end].mean())
            for row in range(len(dataframe)):
                dataframe[(name, sex)].iloc[row] = newvalues[row]
    if has_male and has_female:
        y_text = "% of births of indicated sex"
        has_both = True
    elif has_male:
        y_text = "Percent of male births"
    else:
        y_text = "Percent of female births"
    
    num_series = len(dataframe.columns)
    
    if colors == []:
        colors = ['#BB2114', '#0C5966', '#BA7814', '#4459AB', '#6B3838', 
                  '#B8327B', '#2B947F', '#0D83B5', '#684287', '#8C962C', 
                  '#92289E', '#242D7D']
        # my own list of dark contrasting colors
    num_colors = len(colors)
    
    if num_series > num_colors:
        print "Warning: colors will be repeated."
    
    if title == '':
        if num_series == 1:
            title = "Popularity of baby name %s in U.S., %s" % (final_name, yearstr)
        else:
            title = "Popularity of baby names in U.S., %s" % (yearstr)
    
    x_values = range(startyear, endyear + 1)
    y_zeroes = [0] * (endyear - startyear)
    
    if form == 'line':
        fig, ax = plt.subplots(num=None, figsize=(16, 9), dpi=300, facecolor='w', edgecolor='w')
        counter = 0
        for name, sex in dataframe.columns:
            color = colors[counter % num_colors]
            counter += 1
            if has_both:
                label = "%s (%s)" % (name, sex)
            else:
                label = name
            ax.plot(x_values, dataframe[(name, sex)], label=label, color=color, linewidth = 3)
        ax.set_ylim(0,determine_y_limit(max_y)) 
        ax.set_xlim(startyear, endyear)
        ax.set_ylabel(y_text, size = 13)
        ax.set_title(title, size = 18)
        box = ax.get_position()
        ax.set_position([box.x0, box.y0 + box.height * legend_size,
                 box.width, box.height * (1 - legend_size)])
        legend_cols = min(5, num_series)
        ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), fancybox=True, shadow=True, ncol=legend_cols)

    if form == 'subplots_auto':
        counter = 0
        fig, axes = plt.subplots(num_series, 1, figsize=(12, 3.5*num_series))
        print 'Maximum alpha: %d percent' % (determine_y_limit(max_y))
        for name, sex in dataframe.columns:
            if sex=='M':
                sex_label = 'male'
            else:
                sex_label = 'female'
            label = "Percent of %s births for %s" % (sex_label, name)
            current_ymax = dataframe[(name, sex)].max()
            tint = 1.0 * current_ymax / determine_y_limit(max_y)
            axes[counter].plot(x_values, dataframe[(name, sex)], color='k')
            axes[counter].set_ylim(0,determine_y_limit(current_ymax))
            axes[counter].set_xlim(startyear, endyear)
            axes[counter].fill_between(x_values, dataframe[(name, sex)], color=colors[0], alpha=tint, interpolate=True)

            axes[counter].set_ylabel(label, size=11)
            plt.subplots_adjust(hspace=0.1)
            counter += 1
            
    if form == 'subplots_same':
        counter = 0
        fig, axes = plt.subplots(num_series, 1, figsize=(12, 3.5*num_series))
        print 'Maximum y axis: %d percent' % (determine_y_limit(max_y))
        for name, sex in dataframe.columns:
            if sex=='M':
                sex_label = 'male'
            else:
                sex_label = 'female'
            label = "Percent of %s births for %s" % (sex_label, name)
            axes[counter].plot(x_values, dataframe[(name, sex)], color='k')
            axes[counter].set_ylim(0,determine_y_limit(max_y))
            axes[counter].set_xlim(startyear, endyear)
            axes[counter].fill_between(x_values, dataframe[(name, sex)], color=colors[1], alpha=1, interpolate=True)
            axes[counter].set_ylabel(label, size=11)
            plt.subplots_adjust(hspace=0.1)
            counter += 1
        
    if form == 'stream':
        plt.figure(num=None, figsize=(20,10), dpi=150, facecolor='w', edgecolor='k')
        plt.title(title, size=17)        
        plt.xlim(startyear, endyear)
        
        if has_both:
            yaxtext = 'Percent of births of indicated sex (scale: '
        elif has_male:
            yaxtext = 'Percent of male births (scale: '
        else:
            yaxtext = 'Percent of female births (scale: '
        
        scale = str(determine_y_limit(max_y)) + ')'
        yaxtext += scale
        plt.ylabel(yaxtext, size=13)
        polys = plt.stackplot(x_values, *[dataframe[(name, sex)] for name, sex in dataframe.columns], 
                                 colors=colors, baseline=baseline)
        legendProxies = []
        for poly in polys:
            legendProxies.append(plt.Rectangle((0, 0), 1, 1, fc=poly.get_facecolor()[0]))
        namelist = []
        for name, sex in dataframe.columns:
            if has_both:
                namelist.append('%s (%s)' % (name, sex))
            else:
                namelist.append(name)
        plt.legend(legendProxies, namelist, loc=3, ncol=2)
        
        plt.tick_params(\
            axis='y',          
            which='both',      #  major and minor ticks 
            left='off',      
            right='off',       
            labelleft='off')
        
    plt.show()   
    if png_name != '':
        filename = save_path + "/" + png_name + ".png"
        plt.savefig(filename)
    plt.close()

# line charts

make_chart(df=listed_top_m,
           form='line', # line , subplots_auto , subplots_same , stream
           title=top_boys_title,
           colors= [],
           smoothing=0,
           baseline='zero',  # zero ,  sym ,  wiggle ,  weighted_wiggle
           )

make_chart(df=listed_top_f,
           form='line', # line , subplots_auto , subplots_same , stream
           title=top_girls_title,
           colors= [],
           smoothing=0,
           baseline='zero',  # zero ,  sym ,  wiggle ,  weighted_wiggle
           )

names_listed.reset_index(drop = True, inplace = True)
names_listed.head()
names_listed.to_csv('lists/names_matching_mythological_list.csv')
This is standard output from download_and_process.py
Data already downloaded.
Data already extracted.
Reading from pickle.
Tail of dataframe 'yob':
           name sex  births  year       pct  ranked
1792086  Zyhier   M       5  2013  0.000267   12995
1792087   Zylar   M       5  2013  0.000267   12995
1792088  Zymari   M       5  2013  0.000267   12995
1792089  Zymeer   M       5  2013  0.000267   12995
1792090   Zyree   M       5  2013  0.000267   12995

Tail of dataframe 'names':
                 name sex  year_count  year_min  year_max   pct_sum   pct_max
102685          Gross   M           1      1925      1925  0.000538  0.000538
102686           Elik   M           1      2012      2012  0.000318  0.000318
102687  Patrickjoseph   M           1      1998      1998  0.000262  0.000262
102688       Southern   M           1      1923      1923  0.000547  0.000547
102689           Jeon   M           1      1999      1999  0.000261  0.000261

Tail of dataframe 'years':
    year  births_f  births_m  births_t  new_names  unique_names_x    sexratio  \
68  2008   1886765   2035811   3922576       2046           32483  107.899553   
69  2009   1832276   1978582   3810858       1789           32210  107.984932   
70  2010   1771846   1912915   3684761       1635           31593  107.961696   
71  2011   1752198   1891800   3643998       1539           31412  107.967250   
72  2012   1751866   1886972   3638838       1531           31212  107.712120   

    unique_names_y_x  unique_names_x  unique_names_y_x  unique_names_x  \
68             32483           32483             32483           32483   
69             32210           32210             32210           32210   
70             31593           31593             31593           31593   
71             31412           31412             31412           31412   
72             31212           31212             31212           31212   

    unique_names_y_x  unique_names_x  unique_names_y_x  unique_names_x  \
68             32483           32483             32483           32483   
69             32210           32210             32210           32210   
70             31593           31593             31593           31593   
71             31412           31412             31412           31412   
72             31212           31212             31212           31212   

    unique_names_y_x  unique_names_x  unique_names_y  
68             32483           32483           32483  
69             32210           32210           32210  
70             31593           31593           31593  
71             31412           31412           31412  
72             31212           31212           31212  
--------------------
First 80 characters of list:
['Athena', 'Amaunet', 'Amen', 'Amon', 'Amun', 'Anat', 'Anqet', 'Antaios', 'Anubi ...
all_listed: list of length 701
--------------------
Dataframe names filtered to those that match list
102690 records to begin.
134 records remaining.
          name sex  year_count  year_min  year_max    pct_sum   pct_max
91       Doris   F         134      1880      2013  41.513522  1.477798
431     Sophia   F         134      1880      2013  18.491332  1.269789
258      Chloe   F         134      1880      2013  10.271245  0.662304
264      Diana   F         134      1880      2013  21.019023  0.480095
294      Flora   F         134      1880      2013  12.321405  0.401130
64555     Seth   M         134      1880      2013   8.829215  0.342210
13211    Khloe   F          25      1989      2013   1.430133  0.304485
9        Delia   F         134      1880      2013   4.716709  0.144228
800       Luna   F         121      1880      2013   1.066362  0.099330
64817  Griffin   M         125      1881      2013   1.782372  0.087336
--------------------
Dataframe yob filtered to those that match list (count only)
1792091 records to begin.
5045 records remaining.
--------------------
Head of total matching list per year, female
      births       pct
year                  
1880     824  0.905564
1881     718  0.780825
1882     910  0.843764
1883     920  0.819074
1884     999  0.774287


Tail of dataframe:
             name sex  births  year       pct   ranked  temp
1778363      Seth   M    1578  2013  0.084319    231.0     1
1779752      Thor   M      94  2013  0.005023   1617.5     1
1781010      Amon   M      39  2013  0.002084   2898.0     1
1781160       Sol   M      37  2013  0.001977   3008.0     1
1789275  Hercules   M       6  2013  0.000321  11332.0     1
Tail of dataframe:
            name sex  births  year       pct  ranked  temp
1759288    Diana   F    1171  2013  0.067429   270.0     1
1759319   Phoebe   F    1050  2013  0.060462   301.0     1
1760337    Delia   F     176  2013  0.010135  1319.5     1
1761290    Doris   F      81  2013  0.004664  2275.0     1
1762435  Minerva   F      46  2013  0.002649  3408.0     1
In [7]:
print names_listed.name.unique()
['Doris' 'Sophia' 'Chloe' 'Diana' 'Flora' 'Seth' 'Khloe' 'Delia' 'Luna'
 'Griffin' 'Athena' 'Minerva' 'Phoebe' 'Vesta' 'Sol' 'Phoenix' 'Thalia'
 'Isis' 'Rhea' 'Odin' 'Eris' 'Venus' 'Ares' 'Apollo' 'Persephone' 'Gerda'
 'Amon' 'Thor' 'Lucina' 'Osiris' 'Shai' 'Ran' 'Nanna' 'Loki' 'Zeus'
 'Freyja' 'Melaina' 'Gaia' 'Pelagia' 'Sia' 'Hercules' 'Artemis' 'Juno'
 'Lousia' 'Lamia' 'Clio' 'Urania' 'Amen' 'Andromeda' 'Chloris' 'Hera'
 'Ladon' 'Valkyrie' 'Athene' 'Mars' 'Aphrodite' 'Deianeira' 'Helios' 'Min'
 'Clete' 'Areion' 'Nike' 'Cybele' 'Hermes' 'Fortuna' 'Caliope' 'Ourania'
 'Poseidon' 'Janus' 'Mercury' 'Tyr' 'Areia' 'Chimera' 'Ceres' 'Anat'
 'Holle' 'Anubis' 'Vali' 'Ra' 'Pallas' 'Kore' 'Demeter' 'Shu' 'Jupiter'
 'Soteria' 'Makar' 'Amun' 'Maat' 'Lakinia' 'Pater' 'Agathe' 'Saturn'
 'Tyche' 'Khepri' 'Aten' 'Set' 'Fenris' 'Horus' 'Megale']

Refine list

Some of these names are only coincidentally mythological, e.g. Seth is an Egyptian god's name, but a common Hebrew name, and Doris is a very minor mythological figure, so probably few parents were even aware of the connection (same with Phoebe, Chlore ... Diana is a tougher call, but I think parents are more likely to not have used the name because of a mythological association). Obviously, it's impossible to reading parents' minds with data abstracted like this, so the only choice is to manually curate names that most obviously come from mythology.

In [8]:
cutoffn = 0
# how many names will remain to evaluate after duplicates removed

from collections import OrderedDict
evallistm = OrderedDict()
evallistf = OrderedDict()

# remove names with more common duplicates in other sex
# this happens frequently in ssa db

for name in listed_m:
    try:
        pctf = names_listed[(names_listed.sex == 'F') & 
                            (names_listed.name == name)].pct_max.iloc[0]
        pctm = names_listed[(names_listed.sex == 'M') & 
                            (names_listed.name == name)].pct_max.iloc[0]
    except:
        pctf = 98
        pctm = 99
    if (name not in names_listed[names_listed.sex == 'F'].name.unique() or
        pctf < pctm):
        evallistm[name] = ''
        
for name in listed_f:
    try:
        pctf = names_listed[(names_listed.sex == 'F') & 
                            (names_listed.name == name)].pct_max.iloc[0]
        pctm = names_listed[(names_listed.sex == 'M') & 
                            (names_listed.name == name)].pct_max.iloc[0]
    except:
        pctf = 99
        pctm = 98
    if (name not in names_listed[names_listed.sex == 'M'].name.unique() or
        pctm < pctf):
        evallistf[name] = ''
        
if cutoffn > 0:
    assert len(evallistm) > cutoffn
    assert len(evallistf) > cutoffn
    print evallistm[:cutoffn]
    print evallistf[:cutoffn]
else:
    print 'Length of lists: %d male, %d female\n' % (len(evallistm), len(evallistf))
    print evallistm
    print ' '
    print evallistf
Length of lists: 38 male, 61 female

OrderedDict([('Sol', ''), ('Seth', ''), ('Griffin', ''), ('Amon', ''), ('Thor', ''), ('Hercules', ''), ('Ladon', ''), ('Odin', ''), ('Hermes', ''), ('Apollo', ''), ('Osiris', ''), ('Min', ''), ('Clete', ''), ('Zeus', ''), ('Phoenix', ''), ('Amen', ''), ('Mars', ''), ('Ares', ''), ('Loki', ''), ('Nike', ''), ('Ran', ''), ('Mercury', ''), ('Tyr', ''), ('Jupiter', ''), ('Kore', ''), ('Ra', ''), ('Anubis', ''), ('Helios', ''), ('Poseidon', ''), ('Makar', ''), ('Pater', ''), ('Amun', ''), ('Fenris', ''), ('Set', ''), ('Demeter', ''), ('Horus', ''), ('Megale', ''), ('Aten', '')])
 
OrderedDict([('Delia', ''), ('Minerva', ''), ('Doris', ''), ('Phoebe', ''), ('Chloe', ''), ('Diana', ''), ('Flora', ''), ('Sophia', ''), ('Rhea', ''), ('Venus', ''), ('Vesta', ''), ('Luna', ''), ('Thalia', ''), ('Lucina', ''), ('Athena', ''), ('Gerda', ''), ('Eris', ''), ('Artemis', ''), ('Aphrodite', ''), ('Isis', ''), ('Clio', ''), ('Persephone', ''), ('Melaina', ''), ('Shai', ''), ('Andromeda', ''), ('Lamia', ''), ('Sia', ''), ('Hera', ''), ('Nanna', ''), ('Urania', ''), ('Gaia', ''), ('Khloe', ''), ('Chloris', ''), ('Athene', ''), ('Janus', ''), ('Freyja', ''), ('Valkyrie', ''), ('Ourania', ''), ('Juno', ''), ('Vali', ''), ('Holle', ''), ('Cybele', ''), ('Pelagia', ''), ('Anat', ''), ('Soteria', ''), ('Pallas', ''), ('Fortuna', ''), ('Maat', ''), ('Caliope', ''), ('Chimera', ''), ('Deianeira', ''), ('Agathe', ''), ('Lousia', ''), ('Shu', ''), ('Areion', ''), ('Ceres', ''), ('Areia', ''), ('Saturn', ''), ('Lakinia', ''), ('Tyche', ''), ('Khepri', '')])
In [9]:
#manually copy and paste the above lists and assign 
#'acc' or 'rej' individually to accept or reject

evallistm = OrderedDict([('Sol', 'rej'), ('Seth', 'rej'), ('Griffin', 'rej'), ('Amon', 'rej'), 
                         ('Thor', 'acc'), ('Hercules', 'acc'), ('Ladon', 'rej'), ('Odin', 'acc'), 
                         ('Hermes', 'acc'), ('Apollo', 'acc'), ('Osiris', 'acc'), ('Min', 'rej'), 
                         ('Clete', 'rej'), ('Zeus', 'acc'), ('Phoenix', 'acc'), ('Amen', 'rej'), 
                         ('Mars', 'acc'), ('Ares', 'acc'), ('Loki', 'acc'), ('Nike', 'rej'), 
                         ('Ran', 'rej'), ('Mercury', 'acc'), ('Tyr', 'acc'), ('Jupiter', 'acc'), 
                         ('Kore', 'rej'), ('Ra', 'acc'), ('Anubis', 'acc'), ('Helios', 'acc'), 
                         ('Poseidon', 'acc'), ('Makar', 'rej'), ('Pater', 'rej'), ('Amun', 'rej'), 
                         ('Fenris', 'acc'), ('Set', 'rej'), ('Demeter', 'rej'), ('Horus', 'acc'), 
                         ('Megale', 'rej'), ('Aten', 'acc'), ('Saturn', 'acc')])
 
evallistf = OrderedDict([('Athena', 'acc'), ('Delia', 'rej'), ('Minerva', 'acc'), ('Doris', 'rej'), ('Phoebe', 'rej'), 
                         ('Chloe', 'rej'), ('Diana', 'rej'), ('Flora', 'rej'), ('Sophia', 'rej'), 
                         ('Rhea', 'rej'), ('Venus', 'acc'), ('Vesta', 'acc'), ('Luna', 'rej'),
                         ('Thalia', 'acc'), ('Lucina', 'rej'), ('Gerda', 'rej'), ('Eris', 'acc'), 
                         ('Artemis', 'acc'), ('Aphrodite', 'acc'), ('Isis', 'acc'), ('Clio', 'acc'), 
                         ('Persephone', 'acc'), ('Melaina', 'rej'), ('Shai', 'rej'), ('Andromeda', 'acc'), 
                         ('Lamia', 'acc'), ('Sia', 'rej'), ('Hera', 'acc'), ('Nanna', 'rej'), ('Urania', 'acc'), 
                         ('Gaia', 'acc'), ('Khloe', 'rej'), ('Chloris', 'rej'), ('Athene', 'acc'), 
                         ('Janus', 'rej'), ('Freyja', 'acc'), ('Valkyrie', 'acc'), ('Ourania', 'acc'), 
                         ('Juno', 'acc'), ('Vali', 'acc'), ('Holle', 'rej'), ('Cybele', 'acc'), 
                         ('Pelagia', 'rej'), ('Anat', 'rej'), ('Soteria', 'rej'), ('Pallas', 'acc'), 
                         ('Fortuna', 'rej'), ('Maat', 'acc'), ('Caliope', 'acc'), ('Chimera', 'acc'), 
                         ('Deianeira', 'rej'), ('Agathe', 'rej'), ('Lousia', 'rej'), ('Shu', 'rej'), 
                         ('Areion', 'rej'), ('Ceres', 'acc'), ('Areia', 'rej'), ('Saturn', 'rej'), 
                         ('Lakinia', 'rej'), ('Tyche', 'acc'), ('Khepri', 'acc'), ('Demeter', 'acc'),
                         ('Nike', 'acc')])

# Note, Demeter and Nike taken from males' list and moved to females'; for some reason it spiked in males higher than in females
# Similarly, Saturn moved from females' to males'

# Test that all names have 'acc' or 'rej' values

final_m = []
final_f = []

names_not_validated = []
for item in evallistm:
    if evallistm[item] not in ['acc', 'rej']:
        names_not_validated.append(item)
    elif evallistm[item] == 'acc':
        final_m.append(item)
for item in evallistf:
    if evallistf[item] not in ['acc', 'rej']:
        names_not_validated.append(item)
    elif evallistf[item] == 'acc':
        final_f.append(item)
        
final_all = final_m + final_f

if len(names_not_validated) > 0:
    print "The following names do not have 'acc' or 'rej' values: ", names_not_validated
    raise exception("Names not validated")
    
print 'Accepted male names:', final_m
print 'Accepted female names:', final_f

print 'Length: %d male, %d female\n' % (len(final_m), len(final_f))

cutmin = min(len(final_m), len(final_f))

final_m = final_m[:cutmin]
final_f = final_f[:cutmin]

print 'After resizing to %d names each:' % (cutmin)
print 'Accepted male names:', final_m
print 'Accepted female names:', final_f
Accepted male names: ['Thor', 'Hercules', 'Odin', 'Hermes', 'Apollo', 'Osiris', 'Zeus', 'Phoenix', 'Mars', 'Ares', 'Loki', 'Mercury', 'Tyr', 'Jupiter', 'Ra', 'Anubis', 'Helios', 'Poseidon', 'Fenris', 'Horus', 'Aten', 'Saturn']
Accepted female names: ['Athena', 'Minerva', 'Venus', 'Vesta', 'Thalia', 'Eris', 'Artemis', 'Aphrodite', 'Isis', 'Clio', 'Persephone', 'Andromeda', 'Lamia', 'Hera', 'Urania', 'Gaia', 'Athene', 'Freyja', 'Valkyrie', 'Ourania', 'Juno', 'Vali', 'Cybele', 'Pallas', 'Maat', 'Caliope', 'Chimera', 'Ceres', 'Tyche', 'Khepri', 'Demeter', 'Nike']
Length: 22 male, 32 female

After resizing to 22 names each:
Accepted male names: ['Thor', 'Hercules', 'Odin', 'Hermes', 'Apollo', 'Osiris', 'Zeus', 'Phoenix', 'Mars', 'Ares', 'Loki', 'Mercury', 'Tyr', 'Jupiter', 'Ra', 'Anubis', 'Helios', 'Poseidon', 'Fenris', 'Horus', 'Aten', 'Saturn']
Accepted female names: ['Athena', 'Minerva', 'Venus', 'Vesta', 'Thalia', 'Eris', 'Artemis', 'Aphrodite', 'Isis', 'Clio', 'Persephone', 'Andromeda', 'Lamia', 'Hera', 'Urania', 'Gaia', 'Athene', 'Freyja', 'Valkyrie', 'Ourania', 'Juno', 'Vali']

Redo last block because Saturn, Nike and Demeter were assigned to wrong sex

In [10]:
from copy import deepcopy
oldm = deepcopy(evallistm)
oldf = deepcopy(evallistf)

cutoffn = 0
# how many names will remain to evaluate after duplicates removed

from collections import OrderedDict
evallistm = OrderedDict()
evallistf = OrderedDict()

# remove names with more common duplicates in other sex
# this happens frequently in ssa db

for name in listed_m:
    try:
        pctf = names_listed[(names_listed.sex == 'F') & 
                            (names_listed.name == name)].pct_max.iloc[0]
        pctm = names_listed[(names_listed.sex == 'M') & 
                            (names_listed.name == name)].pct_max.iloc[0]
    except:
        pctf = 98
        pctm = 99
    if (name not in ['Demeter', 'Nike'] and (name not in names_listed[names_listed.sex == 'F'].name.unique() or
        pctf < pctm or name == 'Saturn')):
        evallistm[name] = ''
        
for name in listed_f:
    try:
        pctf = names_listed[(names_listed.sex == 'F') & 
                            (names_listed.name == name)].pct_max.iloc[0]
        pctm = names_listed[(names_listed.sex == 'M') & 
                            (names_listed.name == name)].pct_max.iloc[0]
    except:
        pctf = 99
        pctm = 98
    if (name != 'Saturn' and (name not in names_listed[names_listed.sex == 'M'].name.unique() or
        pctm < pctf or name in ['Demeter', 'Nike'])):
        evallistf[name] = ''

for item in evallistm: # copy from above block
    try:
        evallistm[item] = oldm[item]
    except:
        pass
for item in evallistf:
    try:
        evallistf[item] = oldf[item]
    except:
        pass    
        
        
if cutoffn > 0:
    assert len(evallistm) > cutoffn
    assert len(evallistf) > cutoffn
    print evallistm[:cutoffn]
    print evallistf[:cutoffn]
else:
    print 'Length of lists: %d male, %d female\n' % (len(evallistm), len(evallistf))
    print evallistm
    print ' '
    print evallistf
Length of lists: 36 male, 61 female

OrderedDict([('Sol', 'rej'), ('Seth', 'rej'), ('Griffin', 'rej'), ('Amon', 'rej'), ('Thor', 'acc'), ('Hercules', 'acc'), ('Ladon', 'rej'), ('Odin', 'acc'), ('Hermes', 'acc'), ('Apollo', 'acc'), ('Osiris', 'acc'), ('Min', 'rej'), ('Clete', 'rej'), ('Zeus', 'acc'), ('Phoenix', 'acc'), ('Amen', 'rej'), ('Mars', 'acc'), ('Ares', 'acc'), ('Loki', 'acc'), ('Ran', 'rej'), ('Mercury', 'acc'), ('Tyr', 'acc'), ('Jupiter', 'acc'), ('Kore', 'rej'), ('Ra', 'acc'), ('Anubis', 'acc'), ('Helios', 'acc'), ('Poseidon', 'acc'), ('Makar', 'rej'), ('Pater', 'rej'), ('Amun', 'rej'), ('Fenris', 'acc'), ('Set', 'rej'), ('Horus', 'acc'), ('Megale', 'rej'), ('Aten', 'acc')])
 
OrderedDict([('Delia', 'rej'), ('Minerva', 'acc'), ('Doris', 'rej'), ('Phoebe', 'rej'), ('Chloe', 'rej'), ('Diana', 'rej'), ('Flora', 'rej'), ('Sophia', 'rej'), ('Rhea', 'rej'), ('Venus', 'acc'), ('Vesta', 'acc'), ('Luna', 'rej'), ('Thalia', 'acc'), ('Lucina', 'rej'), ('Athena', 'acc'), ('Gerda', 'rej'), ('Eris', 'acc'), ('Artemis', 'acc'), ('Aphrodite', 'acc'), ('Isis', 'acc'), ('Clio', 'acc'), ('Persephone', 'acc'), ('Melaina', 'rej'), ('Shai', 'rej'), ('Andromeda', 'acc'), ('Lamia', 'acc'), ('Sia', 'rej'), ('Hera', 'acc'), ('Nanna', 'rej'), ('Urania', 'acc'), ('Gaia', 'acc'), ('Khloe', 'rej'), ('Chloris', 'rej'), ('Athene', 'acc'), ('Nike', 'acc'), ('Janus', 'rej'), ('Freyja', 'acc'), ('Valkyrie', 'acc'), ('Ourania', 'acc'), ('Juno', 'acc'), ('Vali', 'acc'), ('Holle', 'rej'), ('Cybele', 'acc'), ('Pelagia', 'rej'), ('Anat', 'rej'), ('Soteria', 'rej'), ('Pallas', 'acc'), ('Fortuna', 'rej'), ('Maat', 'acc'), ('Caliope', 'acc'), ('Chimera', 'acc'), ('Deianeira', 'rej'), ('Agathe', 'rej'), ('Lousia', 'rej'), ('Shu', 'rej'), ('Areion', 'rej'), ('Ceres', 'acc'), ('Areia', 'rej'), ('Lakinia', 'rej'), ('Tyche', 'acc'), ('Khepri', 'acc')])
In [11]:
#manually copy and paste the above lists and assign 
#'acc' or 'rej' individually to accept or reject

# 72, 29 and 80 character rule (PEP)                       do not reach this ->|
#########1#########2#########3#########4#########5#########6#########7#2######9X

evallistm = OrderedDict([('Sol', 'rej'), ('Seth', 'rej'), ('Griffin', 'rej'), 
                         ('Amon', 'rej'), ('Thor', 'acc'), ('Hercules', 'acc'), 
                         ('Ladon', 'rej'), ('Odin', 'acc'), ('Hermes', 'acc'), 
                         ('Apollo', 'acc'), ('Osiris', 'acc'), ('Min', 'rej'), 
                         ('Clete', 'rej'), ('Zeus', 'acc'), ('Phoenix', 'acc'),
                         ('Amen', 'rej'), ('Mars', 'acc'), ('Ares', 'acc'), 
                         ('Loki', 'acc'), ('Ran', 'rej'), ('Mercury', 'acc'),
                         ('Tyr', 'acc'), ('Jupiter', 'acc'), ('Kore', 'rej'),
                         ('Ra', 'acc'), ('Anubis', 'acc'), ('Helios', 'acc'),
                         ('Poseidon', 'acc'), ('Makar', 'rej'),
                         ('Pater', 'rej'), ('Amun', 'rej'), ('Fenris', 'acc'),
                         ('Set', 'rej'), ('Horus', 'acc'), ('Megale', 'rej'),
                         ('Aten', 'acc')])
 
evallistf = OrderedDict([('Athena', 'acc'), ('Delia', 'rej'), ('Minerva', 'acc'), ('Doris', 'rej'),
                         ('Phoebe', 'rej'), ('Chloe', 'rej'), ('Diana', 'rej'),
                         ('Flora', 'rej'), ('Sophia', 'rej'), ('Rhea', 'rej'),
                         ('Venus', 'acc'), ('Vesta', 'acc'), ('Luna', 'rej'), 
                         ('Thalia', 'acc'), ('Lucina', 'rej'), ('Gerda', 'rej'), 
                         ('Eris', 'acc'), ('Artemis', 'acc'), 
                         ('Aphrodite', 'acc'), ('Isis', 'acc'), ('Clio', 'acc'),
                         ('Persephone', 'acc'), ('Melaina', 'rej'), 
                         ('Shai', 'rej'), ('Andromeda', 'acc'), 
                         ('Lamia', 'acc'), ('Sia', 'rej'), ('Hera', 'acc'),
                         ('Nanna', 'rej'), ('Urania', 'acc'), ('Gaia', 'acc'),
                         ('Khloe', 'rej'), ('Chloris', 'rej'),
                         ('Athene', 'acc'), ('Nike', 'acc'), ('Janus', 'rej'),
                         ('Freyja', 'acc'), ('Valkyrie', 'acc'), 
                         ('Ourania', 'acc'), ('Juno', 'acc'), ('Vali', 'acc'),
                         ('Holle', 'rej'), ('Cybele', 'acc'), ('Pelagia', 'rej'),
                         ('Anat', 'rej'), ('Soteria', 'rej'), ('Pallas', 'acc'),
                         ('Fortuna', 'rej'), ('Maat', 'acc'), ('Caliope', 'acc'),
                         ('Chimera', 'acc'), ('Deianeira', 'rej'),
                         ('Agathe', 'rej'), ('Lousia', 'rej'), ('Shu', 'rej'),
                         ('Areion', 'rej'), ('Ceres', 'acc'), ('Areia', 'rej'),
                         ('Lakinia', 'rej'), ('Tyche', 'acc'), ('Khepri', 'acc')])

# Note, Demeter and Nike taken from males' list and moved to females'; for some reason it spiked in males higher than in females
# Similarly, Saturn moved from females' to males'

# Test that all names have 'acc' or 'rej' values

final_m = []
final_f = []

names_not_validated = []
for item in evallistm:
    if evallistm[item] not in ['acc', 'rej']:
        names_not_validated.append(item)
    elif evallistm[item] == 'acc':
        final_m.append(item)
for item in evallistf:
    if evallistf[item] not in ['acc', 'rej']:
        names_not_validated.append(item)
    elif evallistf[item] == 'acc':
        final_f.append(item)

if len(names_not_validated) > 0:
    print "The following names do not have 'acc' or 'rej' values: ", names_not_validated
    raise exception("Names not validated")
    
print 'Accepted male names:', final_m
print 'Accepted female names:', final_f

print 'Length: %d male, %d female\n' % (len(final_m), len(final_f))
Accepted male names: ['Thor', 'Hercules', 'Odin', 'Hermes', 'Apollo', 'Osiris', 'Zeus', 'Phoenix', 'Mars', 'Ares', 'Loki', 'Mercury', 'Tyr', 'Jupiter', 'Ra', 'Anubis', 'Helios', 'Poseidon', 'Fenris', 'Horus', 'Aten']
Accepted female names: ['Athena', 'Minerva', 'Venus', 'Vesta', 'Thalia', 'Eris', 'Artemis', 'Aphrodite', 'Isis', 'Clio', 'Persephone', 'Andromeda', 'Lamia', 'Hera', 'Urania', 'Gaia', 'Athene', 'Nike', 'Freyja', 'Valkyrie', 'Ourania', 'Juno', 'Vali', 'Cybele', 'Pallas', 'Maat', 'Caliope', 'Chimera', 'Ceres', 'Tyche', 'Khepri']
Length: 21 male, 31 female

In [12]:
# manually limit to nice round number

nice_round_number = 100 # if too high, there will be no change
final_m = final_m[:nice_round_number]
final_f = final_f[:nice_round_number]

print 'After manually resizing to nice round number of %d names each:' % (nice_round_number)
print 'Accepted male names:', final_m
print 'Accepted female names:', final_f
After manually resizing to nice round number of 100 names each:
Accepted male names: ['Thor', 'Hercules', 'Odin', 'Hermes', 'Apollo', 'Osiris', 'Zeus', 'Phoenix', 'Mars', 'Ares', 'Loki', 'Mercury', 'Tyr', 'Jupiter', 'Ra', 'Anubis', 'Helios', 'Poseidon', 'Fenris', 'Horus', 'Aten']
Accepted female names: ['Athena', 'Minerva', 'Venus', 'Vesta', 'Thalia', 'Eris', 'Artemis', 'Aphrodite', 'Isis', 'Clio', 'Persephone', 'Andromeda', 'Lamia', 'Hera', 'Urania', 'Gaia', 'Athene', 'Nike', 'Freyja', 'Valkyrie', 'Ourania', 'Juno', 'Vali', 'Cybele', 'Pallas', 'Maat', 'Caliope', 'Chimera', 'Ceres', 'Tyche', 'Khepri']
In [13]:
# BTW, here's those missassigned (it appears) genders:
print names[names.name == 'Saturn']
print names[names.name == 'Demeter']
print names[names.name == 'Nike']
         name sex  year_count  year_min  year_max   pct_sum   pct_max
49524  Saturn   F           2      1996      2001  0.000563  0.000285
          name sex  year_count  year_min  year_max  pct_sum  pct_max
95309  Demeter   M           1      1920      1920  0.00047  0.00047
       name sex  year_count  year_min  year_max   pct_sum   pct_max
18008  Nike   F          18      1953      2013  0.006075  0.000514
76992  Nike   M          15      1989      2013  0.005376  0.000951
In [14]:
%run download_and_process.py

# reduce names dataframe to those matching list
# print '--------------------\nDataframe names filtered to those that match list'
# print "%d records to begin." % (len(names))
names_listed = names[((names.name.isin(final_m) & (names.sex == 'M')) |
                      (names.name.isin(final_f) & (names.sex == 'F')) )].copy()
names_listed.sort('pct_max', ascending=False, inplace=True)
# print "%d records remaining." % (len(names_listed))
# listed_in_df = list(names_listed.name)
# print names_listed.head(10)
# listed_m = list(names[(names.sex == 'M') & (names.name.isin(final_m))]['name'])
# listed_f = list(names[(names.sex == 'F') & (names.name.isin(final_f))]['name'])

#reduce yob dataframe to those matching list
print '--------------------\nDataframe yob filtered to those that match list (count only)'
print "%d records to begin." % (len(yob))
yob_listed_m = yob[(yob.name.isin(final_m)) & (yob.sex == 'M')].copy()
yob_listed_m.sort(['year', 'sex', 'name'], ascending=False, inplace=True)
yob_listed_f = yob[(yob.name.isin(final_f)) & (yob.sex == 'F')].copy()
yob_listed_f.sort(['year', 'sex', 'name'], ascending=False, inplace=True)
print "%d records remaining." % (len(yob_listed))

# m and f totals
yob_listed_f_agg = pd.DataFrame(yob_listed_f.groupby('year').sum())[['births', 'pct']]
yob_listed_m_agg = pd.DataFrame(yob_listed_m.groupby('year').sum())[['births', 'pct']]
print '--------------------\nHead of total matching list per year, female'
print yob_listed_f_agg.head()

# print chart of m and f totals
print '\n'

# function to determine a nice y-axis limit a little above the maximum value
# rounds maximum y up to second-most-significant digit
def determine_y_limit(x):                      
    significance = int(math.floor((math.log10(x))))
    val = math.floor(x / (10 ** (significance - 1))) + 1
    val = val * (10 ** (significance - 1))
    return val

#data
xf = list(yob_listed_f_agg.index)
xm = list(yob_listed_m_agg.index)

plt.figure(figsize=(16,9))
plt.plot(xf, list(yob_listed_f_agg.pct), color="red")
plt.plot(xm, list(yob_listed_m_agg.pct), color="blue")

plt.ylim(0, determine_y_limit(max(list(yob_listed_f_agg.pct)
                                  +list(yob_listed_m_agg.pct))))
plt.xlim(1880, 2013)

plt.title('Top 10 mythological names, boy=blue, girl=red', fontsize = 20)
plt.xlabel("Year", fontsize = 14)
plt.ylabel("% of total births of that sex", fontsize = 14)

plt.show()
Data already downloaded.
Data already extracted.
Reading from pickle.
Tail of dataframe 'yob':
           name sex  births  year       pct  ranked
1792086  Zyhier   M       5  2013  0.000267   12995
1792087   Zylar   M       5  2013  0.000267   12995
1792088  Zymari   M       5  2013  0.000267   12995
1792089  Zymeer   M       5  2013  0.000267   12995
1792090   Zyree   M       5  2013  0.000267   12995

Tail of dataframe 'names':
                 name sex  year_count  year_min  year_max   pct_sum   pct_max
102685          Gross   M           1      1925      1925  0.000538  0.000538
102686           Elik   M           1      2012      2012  0.000318  0.000318
102687  Patrickjoseph   M           1      1998      1998  0.000262  0.000262
102688       Southern   M           1      1923      1923  0.000547  0.000547
102689           Jeon   M           1      1999      1999  0.000261  0.000261

Tail of dataframe 'years':
    year  births_f  births_m  births_t  new_names  unique_names_x    sexratio  \
68  2008   1886765   2035811   3922576       2046           32483  107.899553   
69  2009   1832276   1978582   3810858       1789           32210  107.984932   
70  2010   1771846   1912915   3684761       1635           31593  107.961696   
71  2011   1752198   1891800   3643998       1539           31412  107.967250   
72  2012   1751866   1886972   3638838       1531           31212  107.712120   

    unique_names_y_x  unique_names_x  unique_names_y_x  unique_names_x  \
68             32483           32483             32483           32483   
69             32210           32210             32210           32210   
70             31593           31593             31593           31593   
71             31412           31412             31412           31412   
72             31212           31212             31212           31212   

    unique_names_y_x  unique_names_x  unique_names_y_x  unique_names_x  \
68             32483           32483             32483           32483   
69             32210           32210             32210           32210   
70             31593           31593             31593           31593   
71             31412           31412             31412           31412   
72             31212           31212             31212           31212   

    unique_names_y_x  unique_names_x  unique_names_y_x  unique_names  
68             32483           32483             32483         32483  
69             32210           32210             32210         32210  
70             31593           31593             31593         31593  
71             31412           31412             31412         31412  
72             31212           31212             31212         31212  
--------------------
Dataframe yob filtered to those that match list (count only)
1792091 records to begin.
5045 records remaining.
--------------------
Head of total matching list per year, female
      births       pct
year                  
1880      83  0.091216
1881      73  0.079388
1882      77  0.071395
1883      71  0.063211
1884     106  0.082157


In [15]:
# all names_listed, so we can see which ones to aggregate
# cutoff of 10 already done

print names_listed[names_listed.sex == 'M'].head(50)
print ''
print names_listed[names_listed.sex == 'F'].head(50)
           name sex  year_count  year_min  year_max   pct_sum   pct_max
71603   Phoenix   M          30      1968      2013  0.467450  0.041866
66819      Odin   M          70      1884      2013  0.181126  0.023885
75344      Ares   M          19      1983      2013  0.059744  0.012183
68771    Apollo   M          45      1965      2013  0.092614  0.010901
65453      Thor   M         103      1904      2013  0.223267  0.005603
69242    Osiris   M          42      1970      2013  0.067574  0.005343
76827      Loki   M          15      1996      2013  0.030797  0.004595
70915      Zeus   M          33      1973      2013  0.043210  0.004542
65996  Hercules   M          92      1908      2013  0.089908  0.003887
73252      Mars   M          24      1923      2013  0.012551  0.001431
82133    Helios   M           8      2000      2013  0.003586  0.001069
68592    Hermes   M          46      1924      2013  0.024318  0.000937
86346  Poseidon   M           4      2010      2013  0.002543  0.000802
78294   Mercury   M          13      1972      2012  0.004748  0.000705
79245       Tyr   M          11      2002      2013  0.005111  0.000695
81705    Anubis   M           8      2002      2012  0.002825  0.000516
79701        Ra   M          11      1972      2013  0.003533  0.000510
79294   Jupiter   M          11      1981      2013  0.003643  0.000455
98214      Aten   M           1      2013      2013  0.000267  0.000267
91950    Fenris   M           2      2011      2012  0.000529  0.000265
95454     Horus   M           1      2011      2011  0.000264  0.000264

             name sex  year_count  year_min  year_max   pct_sum   pct_max
1307       Athena   F         104      1902      2013  1.335460  0.083207
75        Minerva   F         134      1880      2013  2.093317  0.069236
679         Vesta   F         127      1880      2012  1.481232  0.044859
1049       Thalia   F         112      1885      2013  0.696109  0.038711
5300         Isis   F          51      1901      2013  0.578503  0.030404
2831         Eris   F          74      1913      2013  0.105284  0.018735
676         Venus   F         127      1887      2013  0.610904  0.015362
5884   Persephone   F          48      1962      2013  0.077109  0.009616
18907      Freyja   F          17      1994      2013  0.020269  0.004376
11122        Gaia   F          29      1980      2013  0.038319  0.004224
3353      Artemis   F          68      1915      2013  0.045047  0.003800
20819        Juno   F          15      1919      2013  0.019627  0.003481
7054        Lamia   F          42      1968      2013  0.059279  0.002726
5469         Clio   F          50      1894      2013  0.047900  0.002709
10392      Urania   F          31      1891      2002  0.016488  0.002696
6648    Andromeda   F          44      1962      2013  0.034651  0.002303
9510         Hera   F          34      1970      2013  0.019592  0.001785
20011    Valkyrie   F          16      1992      2013  0.010817  0.001484
14786      Athene   F          22      1909      2003  0.011416  0.001440
3992    Aphrodite   F          61      1915      2013  0.036654  0.001409
22754      Cybele   F          13      1963      2010  0.008608  0.000948
30199     Caliope   F           7      1919      2013  0.003439  0.000916
20116     Ourania   F          15      1963      2013  0.006050  0.000819
33277     Chimera   F           6      1980      2001  0.002414  0.000659
42757       Ceres   F           3      2005      2013  0.001351  0.000633
18008        Nike   F          18      1953      2013  0.006075  0.000514
21706        Vali   F          14      1952      1967  0.005284  0.000512
29183      Pallas   F           8      1969      2007  0.003137  0.000508
29550        Maat   F           8      1998      2013  0.002521  0.000403
58689       Tyche   F           1      1999      1999  0.000282  0.000282
62227      Khepri   F           1      2009      2009  0.000273  0.000273
In [16]:
# just take top 10

nice_round_number = 10 # if too high, there will be no change
final_m = final_m[:nice_round_number]
final_f = final_f[:nice_round_number]

print 'After manually resizing to nice round number of %d names each:' % (nice_round_number)
print 'Accepted male names:', final_m
print 'Accepted female names:', final_f
After manually resizing to nice round number of 10 names each:
Accepted male names: ['Thor', 'Hercules', 'Odin', 'Hermes', 'Apollo', 'Osiris', 'Zeus', 'Phoenix', 'Mars', 'Ares']
Accepted female names: ['Athena', 'Minerva', 'Venus', 'Vesta', 'Thalia', 'Eris', 'Artemis', 'Aphrodite', 'Isis', 'Clio']
In [17]:
names = final_m[:10]
sexes = ['M'] # can be length 1 or same length as names

yearstart=1880
yearend=2013

start = time.time()
df_chart = yob.copy()
if len(sexes) == 1:
    sexes = sexes * len(names)
    
df_chart = df_chart[df_chart['name'].isin(names)]   

df_chart['temp'] = 0
for row in range(len(df_chart)):
    for pos in range(len(names)):
        if df_chart.name.iloc[row] == names[pos] and df_chart.sex.iloc[row] == sexes[pos]:
            df_chart.temp.iloc[row] = 1
df_chart = df_chart[df_chart.temp == 1]


#To keep more than one data set for charts in memory, change name of chart_1

chart_1 = pd.DataFrame(pd.pivot_table(df_chart, values='pct', index = 'year', columns=['name', 'sex']))

col = chart_1.columns[0]

for yr in range(yearstart, yearend+1): #inserts missing years
    if yr not in chart_1.index:
        #chart_1[col][yr] = 0.0
        chart_1 = chart_1.append(pd.DataFrame(index=[yr], columns=[col], data=[0.0]))

chart_1 = chart_1.fillna(0)

chart_1.sort(inplace=True, ascending=True)

#a single function to make the four different kinds of charts

def make_chart(df=chart_1, form='line', title='', colors= [], smoothing=0, \
               groupedlist = [], baseline='sym', png_name=''):
    
    dataframe = df.copy()
    
    startyear = min(list(dataframe.index))
    endyear = max(list(dataframe.index))
    yearstr = '%d-%d' % (startyear, endyear)
    
    legend_size = 0.01
    
    has_male = False
    has_female = False
    has_both = False
    max_y = 0
    for name, sex in dataframe.columns:
        max_y = max(max_y, dataframe[(name, sex)].max())
        final_name = name
        if sex == 'M': has_male = True
        if sex == 'F': has_female = True
        if smoothing > 0:
            newvalues = []
            for row in range(len(dataframe)):
                start = max(0, row - smoothing)
                end = min(len(dataframe) - 1, row + smoothing)
                newvalues.append(dataframe[(name, sex)].iloc[start:end].mean())
            for row in range(len(dataframe)):
                dataframe[(name, sex)].iloc[row] = newvalues[row]
    if has_male and has_female:
        y_text = "% of births of indicated sex"
        has_both = True
    elif has_male:
        y_text = "Percent of male births"
    else:
        y_text = "Percent of female births"
    
    num_series = len(dataframe.columns)
    
    if colors == []:
       colors = ["#1f78b4","#ae4ec9","#33a02c","#fb9a99","#e31a1c","#a6cee3",
                 "#fdbf6f","#ff7f00","#cab2d6","#6a3d9a","#ffff99","#b15928"]
        #colors = ['#ff0000', '#b00000', '#870000', '#550000', '#e4e400', '#baba00', '#878700', '#545400', '#00ff00', '#00b000', '#008700', '#005500', '#00ffff', '#00b0b0', '#008787', '#005555', '#b0b0ff', '#8484ff', '#4949ff', '#0000ff', '#ff00ff', '#b000b0', '#870087', '#550055', '#e4e4e4', '#bababa', '#878787', '#545454']
    from random import shuffle
    shuffle(colors)
    num_colors = len(colors)
    
    if num_series > num_colors:
        print "Warning: colors will be repeated."
    
    if title == '':
        if num_series == 1:
            title = "Popularity of baby name %s in U.S., %s" % (final_name, yearstr)
        else:
            title = "Popularity of baby names in U.S., %s" % (yearstr)
    
    x_values = range(startyear, endyear + 1)
    y_zeroes = [0] * (endyear - startyear)
    
    if form == 'line':
        fig, ax = plt.subplots(num=None, figsize=(16, 9), dpi=300, facecolor='w', edgecolor='w')
        counter = 0
        for name, sex in dataframe.columns:
            color = colors[counter % num_colors]
            counter += 1
            if has_both:
                label = "%s (%s)" % (name, sex)
            else:
                label = name
            ax.plot(x_values, dataframe[(name, sex)], label=label, color=color, linewidth = 3)
        ax.set_ylim(0,determine_y_limit(max_y)) 
        ax.set_xlim(1980, endyear)
        ax.set_ylabel(y_text, size = 13)
        box = ax.get_position()
        ax.set_position([box.x0, box.y0 + box.height * legend_size,
                 box.width, box.height * (1 - legend_size)])
        legend_cols = min(5, num_series)
        ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), fancybox=True, shadow=True, ncol=legend_cols)

    if form == 'subplots_auto':
        counter = 0
        fig, axes = plt.subplots(num_series, 1, figsize=(12, 3.5*num_series))
        print 'Maximum alpha: %d percent' % (determine_y_limit(max_y))
        for name, sex in dataframe.columns:
            if sex=='M':
                sex_label = 'male'
            else:
                sex_label = 'female'
            label = "Percent of %s births for %s" % (sex_label, name)
            current_ymax = dataframe[(name, sex)].max()
            tint = 1.0 * current_ymax / determine_y_limit(max_y)
            axes[counter].plot(x_values, dataframe[(name, sex)], color='k')
            axes[counter].set_ylim(0,determine_y_limit(current_ymax))
            axes[counter].set_xlim(startyear, endyear)
            axes[counter].fill_between(x_values, dataframe[(name, sex)], color=colors[0], alpha=tint, interpolate=True)

            axes[counter].set_ylabel(label, size=11)
            plt.subplots_adjust(hspace=0.1)
            counter += 1
            
    if form == 'subplots_same':
        counter = 0
        fig, axes = plt.subplots(num_series, 1, figsize=(12, 3.5*num_series))
        print 'Maximum y axis: %d percent' % (determine_y_limit(max_y))
        for name, sex in dataframe.columns:
            if sex=='M':
                sex_label = 'male'
            else:
                sex_label = 'female'
            label = "Percent of %s births for %s" % (sex_label, name)
            axes[counter].plot(x_values, dataframe[(name, sex)], color='k')
            axes[counter].set_ylim(0,determine_y_limit(max_y))
            axes[counter].set_xlim(startyear, endyear)
            axes[counter].fill_between(x_values, dataframe[(name, sex)], color=colors[1], alpha=1, interpolate=True)
            axes[counter].set_ylabel(label, size=11)
            plt.subplots_adjust(hspace=0.1)
            counter += 1
        
    if form == 'stream':
        plt.figure(num=None, figsize=(20,10), dpi=150, facecolor='w', edgecolor='k')
        plt.title(title, size=17)        
        plt.xlim(startyear, endyear)
        
        if has_both:
            yaxtext = 'Percent of births of indicated sex (scale: '
        elif has_male:
            yaxtext = 'Percent of male births (scale: '
        else:
            yaxtext = 'Percent of female births (scale: '
        
        scale = str(determine_y_limit(max_y)) + ')'
        yaxtext += scale
        plt.ylabel(yaxtext, size=13)
        polys = plt.stackplot(x_values, *[dataframe[(name, sex)] for name, sex in dataframe.columns], 
                                 colors=colors, baseline=baseline)
        legendProxies = []
        for poly in polys:
            legendProxies.append(plt.Rectangle((0, 0), 1, 1, fc=poly.get_facecolor()[0]))
        namelist = []
        for name, sex in dataframe.columns:
            if has_both:
                namelist.append('%s (%s)' % (name, sex))
            else:
                namelist.append(name)
        plt.legend(legendProxies, namelist, loc=3, ncol=2)
        
        plt.tick_params(\
            axis='y',          
            which='both',      #  major and minor ticks 
            left='off',      
            right='off',       
            labelleft='off')
        
    plt.show()   
    if png_name != '':
        filename = save_path + "/" + png_name + ".png"
        plt.savefig(filename)
    plt.close()
    
#line graph

make_chart(df=chart_1,
           form='stream', # line , subplots_auto , subplots_same , stream
           title='',
           colors= [],
           smoothing=0,
           baseline='sym',  # zero ,  sym ,  wiggle ,  weighted_wiggle
           png_name = '',  # if '', will not be saved
           )
In [18]:
names = final_f[:12]
sexes = ['F'] # can be length 1 or same length as names

yearstart=1880
yearend=2013

start = time.time()
df_chart = yob.copy()
if len(sexes) == 1:
    sexes = sexes * len(names)
    
df_chart = df_chart[df_chart['name'].isin(names)]   

df_chart['temp'] = 0
for row in range(len(df_chart)):
    for pos in range(len(names)):
        if df_chart.name.iloc[row] == names[pos] and df_chart.sex.iloc[row] == sexes[pos]:
            df_chart.temp.iloc[row] = 1
df_chart = df_chart[df_chart.temp == 1]


#To keep more than one data set for charts in memory, change name of chart_1

chart_1 = pd.DataFrame(pd.pivot_table(df_chart, values='pct', index = 'year', columns=['name', 'sex']))

col = chart_1.columns[0]

for yr in range(yearstart, yearend+1): #inserts missing years
    if yr not in chart_1.index:
        #chart_1[col][yr] = 0.0
        chart_1 = chart_1.append(pd.DataFrame(index=[yr], columns=[col], data=[0.0]))

chart_1 = chart_1.fillna(0)

chart_1.sort(inplace=True, ascending=True)

#a single function to make the four different kinds of charts

def make_chart(df=chart_1, form='line', title='', colors= [], smoothing=0, \
               groupedlist = [], baseline='sym', png_name=''):
    
    dataframe = df.copy()
    
    startyear = min(list(dataframe.index))
    endyear = max(list(dataframe.index))
    yearstr = '%d-%d' % (startyear, endyear)
    
    legend_size = 0.01
    
    has_male = False
    has_female = False
    has_both = False
    max_y = 0
    for name, sex in dataframe.columns:
        max_y = max(max_y, dataframe[(name, sex)].max())
        final_name = name
        if sex == 'M': has_male = True
        if sex == 'F': has_female = True
        if smoothing > 0:
            newvalues = []
            for row in range(len(dataframe)):
                start = max(0, row - smoothing)
                end = min(len(dataframe) - 1, row + smoothing)
                newvalues.append(dataframe[(name, sex)].iloc[start:end].mean())
            for row in range(len(dataframe)):
                dataframe[(name, sex)].iloc[row] = newvalues[row]
    if has_male and has_female:
        y_text = "% of births of indicated sex"
        has_both = True
    elif has_male:
        y_text = "Percent of male births"
    else:
        y_text = "Percent of female births"
    
    num_series = len(dataframe.columns)
    
    if colors == []:
       colors = ["#1f78b4","#ae4ec9","#33a02c","#fb9a99","#e31a1c","#a6cee3",
                 "#fdbf6f","#ff7f00","#cab2d6","#6a3d9a","#ffff99","#b15928"]
        #colors = ['#ff0000', '#b00000', '#870000', '#550000', '#e4e400', '#baba00', '#878700', '#545400', '#00ff00', '#00b000', '#008700', '#005500', '#00ffff', '#00b0b0', '#008787', '#005555', '#b0b0ff', '#8484ff', '#4949ff', '#0000ff', '#ff00ff', '#b000b0', '#870087', '#550055', '#e4e4e4', '#bababa', '#878787', '#545454']
    from random import shuffle
    shuffle(colors)
    num_colors = len(colors)
    
    if num_series > num_colors:
        print "Warning: colors will be repeated."
    
    if title == '':
        if num_series == 1:
            title = "Popularity of baby name %s in U.S., %s" % (final_name, yearstr)
        else:
            title = "Popularity of baby names in U.S., %s" % (yearstr)
    
    x_values = range(startyear, endyear + 1)
    y_zeroes = [0] * (endyear - startyear)
    
    if form == 'line':
        fig, ax = plt.subplots(num=None, figsize=(16, 9), dpi=300, facecolor='w', edgecolor='w')
        counter = 0
        for name, sex in dataframe.columns:
            color = colors[counter % num_colors]
            counter += 1
            if has_both:
                label = "%s (%s)" % (name, sex)
            else:
                label = name
            ax.plot(x_values, dataframe[(name, sex)], label=label, color=color, linewidth = 3)
        ax.set_ylim(0,determine_y_limit(max_y)) 
        ax.set_xlim(1980, endyear)
        ax.set_ylabel(y_text, size = 13)
        box = ax.get_position()
        ax.set_position([box.x0, box.y0 + box.height * legend_size,
                 box.width, box.height * (1 - legend_size)])
        legend_cols = min(5, num_series)
        ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), fancybox=True, shadow=True, ncol=legend_cols)

    if form == 'subplots_auto':
        counter = 0
        fig, axes = plt.subplots(num_series, 1, figsize=(12, 3.5*num_series))
        print 'Maximum alpha: %d percent' % (determine_y_limit(max_y))
        for name, sex in dataframe.columns:
            if sex=='M':
                sex_label = 'male'
            else:
                sex_label = 'female'
            label = "Percent of %s births for %s" % (sex_label, name)
            current_ymax = dataframe[(name, sex)].max()
            tint = 1.0 * current_ymax / determine_y_limit(max_y)
            axes[counter].plot(x_values, dataframe[(name, sex)], color='k')
            axes[counter].set_ylim(0,determine_y_limit(current_ymax))
            axes[counter].set_xlim(startyear, endyear)
            axes[counter].fill_between(x_values, dataframe[(name, sex)], color=colors[0], alpha=tint, interpolate=True)

            axes[counter].set_ylabel(label, size=11)
            plt.subplots_adjust(hspace=0.1)
            counter += 1
            
    if form == 'subplots_same':
        counter = 0
        fig, axes = plt.subplots(num_series, 1, figsize=(12, 3.5*num_series))
        print 'Maximum y axis: %d percent' % (determine_y_limit(max_y))
        for name, sex in dataframe.columns:
            if sex=='M':
                sex_label = 'male'
            else:
                sex_label = 'female'
            label = "Percent of %s births for %s" % (sex_label, name)
            axes[counter].plot(x_values, dataframe[(name, sex)], color='k')
            axes[counter].set_ylim(0,determine_y_limit(max_y))
            axes[counter].set_xlim(startyear, endyear)
            axes[counter].fill_between(x_values, dataframe[(name, sex)], color=colors[1], alpha=1, interpolate=True)
            axes[counter].set_ylabel(label, size=11)
            plt.subplots_adjust(hspace=0.1)
            counter += 1
        
    if form == 'stream':
        plt.figure(num=None, figsize=(20,10), dpi=150, facecolor='w', edgecolor='k')
        plt.title(title, size=17)        
        plt.xlim(startyear, endyear)
        
        if has_both:
            yaxtext = 'Percent of births of indicated sex (scale: '
        elif has_male:
            yaxtext = 'Percent of male births (scale: '
        else:
            yaxtext = 'Percent of female births (scale: '
        
        scale = str(determine_y_limit(max_y)) + ')'
        yaxtext += scale
        plt.ylabel(yaxtext, size=13)
        polys = plt.stackplot(x_values, *[dataframe[(name, sex)] for name, sex in dataframe.columns], 
                                 colors=colors, baseline=baseline)
        legendProxies = []
        for poly in polys:
            legendProxies.append(plt.Rectangle((0, 0), 1, 1, fc=poly.get_facecolor()[0]))
        namelist = []
        for name, sex in dataframe.columns:
            if has_both:
                namelist.append('%s (%s)' % (name, sex))
            else:
                namelist.append(name)
        plt.legend(legendProxies, namelist, loc=3, ncol=2)
        
        plt.tick_params(\
            axis='y',          
            which='both',      #  major and minor ticks 
            left='off',      
            right='off',       
            labelleft='off')
        
    plt.show()   
    if png_name != '':
        filename = save_path + "/" + png_name + ".png"
        plt.savefig(filename)
    plt.close()
    
#line graph

make_chart(df=chart_1,
           form='stream', # line , subplots_auto , subplots_same , stream
           title="10 most popular mythological girls' names, 2914-2013",
           colors= [],
           smoothing=0,
           baseline='sym',  # zero ,  sym ,  wiggle ,  weighted_wiggle
           png_name = '',  # if '', will not be saved
           )

Values before WWII are HEAVILY waited towards white middle-class, so refine these graphs to show 1945-present

In [19]:
names = final_f[:10]
sexes = ['F'] # can be length 1 or same length as names

yearstart=1880 # for data, not graph
yearend=2013

xmin = 1940

start = time.time()
df_chart = yob.copy()
if len(sexes) == 1:
    sexes = sexes * len(names)
    
df_chart = df_chart[df_chart['name'].isin(names)]   

df_chart['temp'] = 0
for row in range(len(df_chart)):
    for pos in range(len(names)):
        if df_chart.name.iloc[row] == names[pos] and df_chart.sex.iloc[row] == sexes[pos]:
            df_chart.temp.iloc[row] = 1
df_chart = df_chart[df_chart.temp == 1]


#To keep more than one data set for charts in memory, change name of chart_1

chart_1 = pd.DataFrame(pd.pivot_table(df_chart, values='pct', index = 'year', columns=['name', 'sex']))

col = chart_1.columns[0]

for yr in range(yearstart, yearend+1): #inserts missing years
    if yr not in chart_1.index:
        #chart_1[col][yr] = 0.0
        chart_1 = chart_1.append(pd.DataFrame(index=[yr], columns=[col], data=[0.0]))

chart_1 = chart_1.fillna(0)

chart_1.sort(inplace=True, ascending=True)

#a single function to make the four different kinds of charts

def make_chart(df=chart_1, form='line', title='', colors= [], smoothing=0, \
               groupedlist = [], baseline='sym', png_name=''):
    
    dataframe = df.copy()
    
    startyear = min(list(dataframe.index))
    endyear = max(list(dataframe.index))
    yearstr = '%d-%d' % (startyear, endyear)
    
    legend_size = 0.01
    
    has_male = False
    has_female = False
    has_both = False
    max_y = 0
    for name, sex in dataframe.columns:
        max_y = max(max_y, dataframe[(name, sex)].max())
        final_name = name
        if sex == 'M': has_male = True
        if sex == 'F': has_female = True
        if smoothing > 0:
            newvalues = []
            for row in range(len(dataframe)):
                start = max(0, row - smoothing)
                end = min(len(dataframe) - 1, row + smoothing)
                newvalues.append(dataframe[(name, sex)].iloc[start:end].mean())
            for row in range(len(dataframe)):
                dataframe[(name, sex)].iloc[row] = newvalues[row]
    if has_male and has_female:
        y_text = "% of births of indicated sex"
        has_both = True
    elif has_male:
        y_text = "Percent of male births"
    else:
        y_text = "Percent of female births"
    
    num_series = len(dataframe.columns)
    
    if colors == []:
       colors = ["#1f78b4","#ae4ec9","#33a02c","#fb9a99","#e31a1c","#a6cee3",
                 "#fdbf6f","#ff7f00","#cab2d6","#6a3d9a","#ffff99","#b15928"]
        #colors = ['#ff0000', '#b00000', '#870000', '#550000', '#e4e400', '#baba00', '#878700', '#545400', '#00ff00', '#00b000', '#008700', '#005500', '#00ffff', '#00b0b0', '#008787', '#005555', '#b0b0ff', '#8484ff', '#4949ff', '#0000ff', '#ff00ff', '#b000b0', '#870087', '#550055', '#e4e4e4', '#bababa', '#878787', '#545454']
    from random import shuffle
    shuffle(colors)
    num_colors = len(colors)
    
    if num_series > num_colors:
        print "Warning: colors will be repeated."
    
    if title == '':
        if num_series == 1:
            title = "Popularity of baby name %s in U.S., %s" % (final_name, yearstr)
        else:
            title = "Popularity of baby names in U.S., %s" % (yearstr)
    
    x_values = range(startyear, endyear + 1)
    y_zeroes = [0] * (endyear - startyear)
    
    if form == 'line':
        fig, ax = plt.subplots(num=None, figsize=(16, 9), dpi=300, facecolor='w', edgecolor='w')
        counter = 0
        for name, sex in dataframe.columns:
            color = colors[counter % num_colors]
            counter += 1
            if has_both:
                label = "%s (%s)" % (name, sex)
            else:
                label = name
            ax.plot(x_values, dataframe[(name, sex)], label=label, color=color, linewidth = 3)
        ax.set_ylim(0,determine_y_limit(max_y)) 
        ax.set_xlim(xmin, endyear)
        ax.set_ylabel(y_text, size = 13)
        box = ax.get_position()
        ax.set_position([box.x0, box.y0 + box.height * legend_size,
                 box.width, box.height * (1 - legend_size)])
        legend_cols = min(5, num_series)
        ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), fancybox=True, shadow=True, ncol=legend_cols)

    if form == 'subplots_auto':
        counter = 0
        fig, axes = plt.subplots(num_series, 1, figsize=(12, 3.5*num_series))
        print 'Maximum alpha: %d percent' % (determine_y_limit(max_y))
        for name, sex in dataframe.columns:
            if sex=='M':
                sex_label = 'male'
            else:
                sex_label = 'female'
            label = "Percent of %s births for %s" % (sex_label, name)
            current_ymax = dataframe[(name, sex)].max()
            tint = 1.0 * current_ymax / determine_y_limit(max_y)
            axes[counter].plot(x_values, dataframe[(name, sex)], color='k')
            axes[counter].set_ylim(0,determine_y_limit(current_ymax))
            axes[counter].set_xlim(xmin, endyear)
            axes[counter].fill_between(x_values, dataframe[(name, sex)], color=colors[0], alpha=tint, interpolate=True)

            axes[counter].set_ylabel(label, size=11)
            plt.subplots_adjust(hspace=0.1)
            counter += 1
            
    if form == 'subplots_same':
        counter = 0
        fig, axes = plt.subplots(num_series, 1, figsize=(12, 3.5*num_series))
        print 'Maximum y axis: %d percent' % (determine_y_limit(max_y))
        for name, sex in dataframe.columns:
            if sex=='M':
                sex_label = 'male'
            else:
                sex_label = 'female'
            label = "Percent of %s births for %s" % (sex_label, name)
            axes[counter].plot(x_values, dataframe[(name, sex)], color='k')
            axes[counter].set_ylim(0,determine_y_limit(max_y))
            axes[counter].set_xlim(xmin, endyear)
            axes[counter].fill_between(x_values, dataframe[(name, sex)], color=colors[1], alpha=1, interpolate=True)
            axes[counter].set_ylabel(label, size=11)
            plt.subplots_adjust(hspace=0.1)
            counter += 1
        
    if form == 'stream':
        plt.figure(num=None, figsize=(20,16.67), dpi=150, facecolor='w', edgecolor='k')
        plt.title(title, size=17)        
        plt.xlim(xmin, endyear)
        
        if has_both:
            yaxtext = 'Percent of births of indicated sex (scale: '
        elif has_male:
            yaxtext = 'Percent of male births (scale: '
        else:
            yaxtext = 'Percent of female births (scale: '
        
        scale = str(determine_y_limit(max_y)) + ')'
        yaxtext += scale
        plt.ylabel(yaxtext, size=13)
        polys = plt.stackplot(x_values, *[dataframe[(name, sex)] for name, sex in dataframe.columns], 
                                 colors=colors, baseline=baseline)
        legendProxies = []
        for poly in polys:
            legendProxies.append(plt.Rectangle((0, 0), 1, 1, fc=poly.get_facecolor()[0]))
        namelist = []
        for name, sex in dataframe.columns:
            if has_both:
                namelist.append('%s (%s)' % (name, sex))
            else:
                namelist.append(name)
        plt.legend(legendProxies, namelist, loc=3, ncol=2)
        
        plt.tick_params(\
            axis='y',          
            which='both',      #  major and minor ticks 
            left='off',      
            right='off',       
            labelleft='off')
        
    plt.show()   
    if png_name != '':
        filename = save_path + "/" + png_name + ".png"
        plt.savefig(filename)
    plt.close()
    
#stream graph

make_chart(df=chart_1,
           form='stream', # line , subplots_auto , subplots_same , stream
           title='',
           colors= [],
           smoothing=0,
           baseline='sym',  # zero ,  sym ,  wiggle ,  weighted_wiggle
           png_name = '',  # if '', will not be saved
           )