In [3]:
%matplotlib inline 
import matplotlib.pyplot as plt 
import pandas 
import numpy as np

import statsmodels.api as sm
from statsmodels.sandbox.regression.predstd import wls_prediction_std

import plotly.plotly as py
import plotly.graph_objs as go
from plotly.tools import FigureFactory as FF
py.sign_in('erikrood','3eqsrype8v')
In [4]:
all_ipos = pandas.read_csv('/Users/erikrood/desktop/ipython_datasets/Tech_IPOs.csv')
-------------------------------------------------------------------
OSError                           Traceback (most recent call last)
<ipython-input-4-428b7d88cdaf> in <module>()
----> 1 all_ipos = pandas.read_csv('/Users/erikrood/desktop/ipython_datasets/Tech_IPOs.csv')

/Users/erikrood/anaconda3/lib/python3.5/site-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)
    560                     skip_blank_lines=skip_blank_lines)
    561 
--> 562         return _read(filepath_or_buffer, kwds)
    563 
    564     parser_f.__name__ = name

/Users/erikrood/anaconda3/lib/python3.5/site-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
    313 
    314     # Create the parser.
--> 315     parser = TextFileReader(filepath_or_buffer, **kwds)
    316 
    317     if (nrows is not None) and (chunksize is not None):

/Users/erikrood/anaconda3/lib/python3.5/site-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
    643             self.options['has_index_names'] = kwds['has_index_names']
    644 
--> 645         self._make_engine(self.engine)
    646 
    647     def close(self):

/Users/erikrood/anaconda3/lib/python3.5/site-packages/pandas/io/parsers.py in _make_engine(self, engine)
    797     def _make_engine(self, engine='c'):
    798         if engine == 'c':
--> 799             self._engine = CParserWrapper(self.f, **self.options)
    800         else:
    801             if engine == 'python':

/Users/erikrood/anaconda3/lib/python3.5/site-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
   1211         kwds['allow_leading_cols'] = self.index_col is not False
   1212 
-> 1213         self._reader = _parser.TextReader(src, **kwds)
   1214 
   1215         # XXX

pandas/parser.pyx in pandas.parser.TextReader.__cinit__ (pandas/parser.c:3427)()

pandas/parser.pyx in pandas.parser.TextReader._setup_parser_source (pandas/parser.c:6861)()

OSError: File b'/Users/erikrood/desktop/ipython_datasets/Tech_IPOs.csv' does not exist
In [ ]:
all_ipos.head()
In [ ]:
#dropping rows with no listed IPO year
all_ipos = all_ipos[all_ipos.IPOyear != 'n/a']
all_ipos = all_ipos[all_ipos.Sector != 'n/a']
In [ ]:
tech_ipos = all_ipos[all_ipos.Sector == 'Technology']
In [ ]:
total_ipos_sector = all_ipos.groupby('IPOyear').count().sort_values(by='Sector',ascending=False).reset_index()
total_ipos_sector = total_ipos_sector[['IPOyear','Sector']]
total_ipos_sector.head()

IPOs by sector year (n-count, use to create multi-series line plot)

In [ ]:
all_ipos_clean.info()
In [ ]:
#setting up initial pivot view, I want my sector variables as columns though
all_ipos_clean = all_ipos[['Sector','IPOyear','Industry']]
df_pivot = all_ipos_clean.groupby(['Sector','IPOyear']).count().reset_index()
df_pivot.columns = ['Sector','IPOyear','Total_IPOs']
df_pivot.head(5)
In [ ]:
#pivoting to shift sectors to columns
df_pivot2 = df_pivot.pivot('IPOyear','Sector','Total_IPOs').reset_index()
In [ ]:
#replacing null counts with 0
df_pivot2.replace(np.NaN, 0, inplace=True)

#stripping whitespace in columns
#used to replace spaces w/ "_" (strip whitespace)
df_pivot2.columns = [x.strip().replace(' ', '_') for x in df_pivot2.columns]
df_pivot2.columns = [x.strip().replace('-', '_') for x in df_pivot2.columns]
df_pivot2.head()
In [ ]:
df_pivot2['Total'] = df_pivot2.Basic_Industries +df_pivot2.Capital_Goods+df_pivot2.Consumer_Durables+df_pivot2.Consumer_Non_Durables\
+df_pivot2.Consumer_Services+df_pivot2.Energy+df_pivot2.Finance+df_pivot2.Health_Care+df_pivot2.Miscellaneous\
+df_pivot2.Public_Utilities+df_pivot2.Technology + df_pivot2.Transportation

df_pivot2['Tech_perc_total'] = (df_pivot2.Technology/df_pivot2.Total)*100
df_pivot2.head()
In [ ]:
df_combchart = df_pivot2[['IPOyear','Technology','Tech_perc_total']]
df_combchart = df_combchart[(df_combchart['IPOyear'] >= '1988')].reset_index()

df_combchart.head()
In [ ]:
#bar with # of registrations, line with % of total

#bar chart

x = df_combchart['IPOyear']
y1 = df_combchart['Technology']
y2 = df_combchart['Tech_perc_total']


trace0 = go.Bar(
        x=x,
        y=y1,
        name='Technology Sector IPOs',
        marker=dict(
            color='rgb(178, 227, 250)',
            line=dict(
                color='rgb(8,48,107)',
                width=1.5
            ),
        ),
        opacity=0.6
    )

trace1 = go.Scatter(
        x=x,
        y=y2,
        name='percent of total IPOs',
        yaxis = 'y2',
        marker=dict(
            color='rgb(84, 226, 129)',
            line=dict(
                color='rgb(8,48,107)',
                width=1.5
            ),
        ),
        opacity=0.6
    )
        
data = [trace0, trace1]
    
layout = go.Layout(
      title='Technology Sector IPOs',
    legend=dict(
        x=.5,
        y=1,
        bgcolor='#E2E2E2',
        bordercolor='#FFFFFF',
        borderwidth=2,
        ),
    xaxis=dict(
        title='category',
        tickangle = 47,
    ),
    
    yaxis=dict(
        title='# of Technology Sector IPOs'
    ),
    
    yaxis2=dict(
        title='percent of total IPOs',
        overlaying='y',
        side='right'
    ),
    
     margin=go.Margin(
        b = 150,
        r=50,
        t = 50
    )

    
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='tech_IPOS')
In [ ]:
import datetime
import numpy as np
import plotly.plotly as py
import plotly.graph_objs as go


year = df_pivot['IPOyear']
y = df_pivot['Sector']
z = df_pivot['Total_IPOs']

data = [
    go.Heatmap(
        z=z,
        x=year,
        y=y,
        colorscale=[[0, 'rgb(216, 236, 247)'], [0.5, 'rgb(49,54,149)'], [1.0, 'rgb(109, 217, 109)']]
    ),
    
]

layout = go.Layout(
    title='IPOs per year by Sector',
    xaxis = dict(ticks='', nticks=20),
    yaxis = dict(ticks='' )
)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='datetime-heatmap')
In [ ]:
df_pivot_above2000 = df_pivot[(df_pivot['IPOyear'] >= '2000')].reset_index()
In [ ]:
import datetime
import numpy as np
import plotly.plotly as py
import plotly.graph_objs as go


year = df_pivot_above2000['IPOyear']
y = df_pivot_above2000['Sector']
z = df_pivot_above2000['Total_IPOs']

data = [
    go.Heatmap(
        z=z,
        x=year,
        y=y,
        colorscale=[[0, 'rgb(216, 236, 247)'], [0.5, 'rgb(49,54,149)'], [1.0, 'rgb(109, 217, 109)']]
    ),
    
]

layout = go.Layout(
    title='IPOs per year by Sector',
    xaxis = dict(ticks='', nticks=20),
    yaxis = dict(ticks='' )
)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='datetime-heatmap2')

multi-series line chart - did not use

In [ ]:
# Create and style traces
x = df_pivot2['IPOyear']
y1 = df_pivot2['Basic_Industries']
y2 = df_pivot2['Capital_Goods']
y3 = df_pivot2['Consumer_Durables']
y4 = df_pivot2['Consumer_Non_Durables']
y5 = df_pivot2['Consumer_Services']
y6 = df_pivot2['Energy']
y7 = df_pivot2['Finance']
y8 = df_pivot2['Health_Care']
y9 = df_pivot2['Public_Utilities']
y10 = df_pivot2['Technology']
y11 = df_pivot2['Transportation']


trace0 = go.Scatter(
    x = x,
    y = y1,
    name = 'Basic_Industries',
    line = dict(
        color = ('rgb(205, 12, 24)'),
        width = 4)
)
trace1 = go.Scatter(
    x = x,
    y = y2,
    name = 'Capital_Goods',
    line = dict(
        color = ('rgb(22, 96, 167)'),
        width = 4,)
)
trace2 = go.Scatter(
    x = x,
    y = y3,
    name = 'Consumer_Durables',
    line = dict(
        color = ('rgb(205, 12, 24)'),
        width = 4,
        dash = 'dash') # dash options include 'dash', 'dot', and 'dashdot'
)
trace3 = go.Scatter(
    x = x,
    y = y4,
    name = 'Consumer_Non-Durables',
    line = dict(
        color = ('rgb(22, 96, 167)'),
        width = 4,
        dash = 'dash')
)
trace4 = go.Scatter(
    x = x,
    y = y5,
    name = 'Consumer_Services',
    line = dict(
        color = ('rgb(205, 12, 24)'),
        width = 4,
        dash = 'dot')
)
trace5 = go.Scatter(
    x = x,
    y = y6,
    name = 'Energy',
    line = dict(
        color = ('rgb(22, 96, 167)'),
        width = 4,
        dash = 'dot')
)
data = [trace0, trace1, trace2, trace3, trace4, trace5]

# Edit the layout
layout = dict(title = '# of IPOs by sector',
              xaxis = dict(title = '# IPOs'),
              yaxis = dict(title = 'Year'),
              )

# Plot and embed in ipython notebook!
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='styled-line')

Tech IPOs by year (n-count)

In [ ]:
tech_ipos_pivot = tech_ipos[['IPOyear','Sector']]
tech_ipos_pivot = tech_ipos_pivot.groupby(['IPOyear']).count().reset_index()
tech_ipos_pivot.head()
In [ ]: