!pip install --upgrade pandas-datareader !pip install --upgrade yfinance %matplotlib inline import pandas as pd import numpy as np import matplotlib.pyplot as plt plt.rcParams["figure.figsize"] = [10,8] # Set default figure size import requests s = pd.Series(np.random.randn(4), name='daily returns') s s * 100 np.abs(s) s.describe() s.index = ['AMZN', 'AAPL', 'MSFT', 'GOOG'] s s['AMZN'] s['AMZN'] = 0 s 'AAPL' in s df = pd.read_csv('https://raw.githubusercontent.com/QuantEcon/lecture-python-programming/master/source/_static/lecture_specific/pandas/data/test_pwt.csv') type(df) df df[2:5] df[['country', 'tcgdp']] df.iloc[2:5, 0:4] df.loc[df.index[2:5], ['country', 'tcgdp']] df[df.POP >= 20000] df.POP >= 20000 df[(df.country.isin(['Argentina', 'India', 'South Africa'])) & (df.POP > 40000)] # the above is equivalent to df.query("POP >= 20000") df.query("country in ['Argentina', 'India', 'South Africa'] and POP > 40000") df[(df.cc + df.cg >= 80) & (df.POP <= 20000)] # the above is equivalent to df.query("cc + cg >= 80 & POP <= 20000") df.loc[df.cc == max(df.cc)] df.loc[(df.cc + df.cg >= 80) & (df.POP <= 20000), ['country', 'year', 'POP']] df_subset = df[['country', 'POP', 'tcgdp']] df_subset df[['year', 'POP', 'XRAT', 'tcgdp', 'cc', 'cg']].apply(max) df.apply(lambda row: row, axis=1) complexCondition = df.apply( lambda row: row.POP > 40000 if row.country in ['Argentina', 'India', 'South Africa'] else row.POP < 20000, axis=1), ['country', 'year', 'POP', 'XRAT', 'tcgdp'] complexCondition df.loc[complexCondition] df.where(df.POP >= 20000, False) df.loc[df.cg == max(df.cg), 'cg'] = np.nan df def update_row(row): # modify POP row.POP = np.nan if row.POP<= 10000 else row.POP # modify XRAT row.XRAT = row.XRAT / 10 return row df.apply(update_row, axis=1) # Round all decimal numbers to 2 decimal places df.applymap(lambda x : round(x,2) if type(x)!=str else x) for idx in list(zip([0, 3, 5, 6], [3, 4, 6, 2])): df.iloc[idx] = np.nan df # replace all NaN values by 0 def replace_nan(x): if type(x)!=str: return 0 if np.isnan(x) else x else: return x df.applymap(replace_nan) df = df.fillna(df.iloc[:,2:8].mean()) df df = df[['country', 'POP', 'tcgdp']] df df = df.set_index('country') df df.columns = 'population', 'total GDP' df df['population'] = df['population'] * 1e3 df df['GDP percap'] = df['total GDP'] * 1e6 / df['population'] df ax = df['GDP percap'].plot(kind='bar') ax.set_xlabel('country', fontsize=12) ax.set_ylabel('GDP per capita', fontsize=12) plt.show() df = df.sort_values(by='GDP percap', ascending=False) df ax = df['GDP percap'].plot(kind='bar') ax.set_xlabel('country', fontsize=12) ax.set_ylabel('GDP per capita', fontsize=12) plt.show() r = requests.get('http://research.stlouisfed.org/fred2/series/UNRATE/downloaddata/UNRATE.csv') url = 'http://research.stlouisfed.org/fred2/series/UNRATE/downloaddata/UNRATE.csv' source = requests.get(url).content.decode().split("\n") source[0] source[1] source[2] data = pd.read_csv(url, index_col=0, parse_dates=True) type(data) data.head() # A useful method to get a quick look at a data frame pd.set_option('display.precision', 1) data.describe() # Your output might differ slightly ax = data['2006':'2012'].plot(title='US Unemployment Rate', legend=False) ax.set_xlabel('year', fontsize=12) ax.set_ylabel('%', fontsize=12) plt.show() from pandas_datareader import wb govt_debt = wb.download(indicator='GC.DOD.TOTL.GD.ZS', country=['US', 'AU'], start=2005, end=2016).stack().unstack(0) ind = govt_debt.index.droplevel(-1) govt_debt.index = ind ax = govt_debt.plot(lw=2) ax.set_xlabel('year', fontsize=12) plt.title("Government Debt to GDP (%)") plt.show() import datetime as dt import yfinance as yf ticker_list = {'INTC': 'Intel', 'MSFT': 'Microsoft', 'IBM': 'IBM', 'BHP': 'BHP', 'TM': 'Toyota', 'AAPL': 'Apple', 'AMZN': 'Amazon', 'C': 'Citigroup', 'QCOM': 'Qualcomm', 'KO': 'Coca-Cola', 'GOOG': 'Google'} def read_data(ticker_list, start=dt.datetime(2021, 1, 1), end=dt.datetime(2021, 12, 31)): """ This function reads in closing price data from Yahoo for each tick in the ticker_list. """ ticker = pd.DataFrame() for tick in ticker_list: stock = yf.Ticker(tick) prices = stock.history(start=start, end=end) # Change the index to date-only prices.index = pd.to_datetime(prices.index.date) closing_prices = prices['Close'] ticker[tick] = closing_prices return ticker ticker = read_data(ticker_list) p1 = ticker.iloc[0] #Get the first set of prices as a Series p2 = ticker.iloc[-1] #Get the last set of prices as a Series price_change = (p2 - p1) / p1 * 100 price_change change = ticker.pct_change(periods=len(ticker)-1, axis='rows')*100 price_change = change.iloc[-1] price_change price_change.sort_values(inplace=True) price_change = price_change.rename(index=ticker_list) fig, ax = plt.subplots(figsize=(10,8)) ax.set_xlabel('stock', fontsize=12) ax.set_ylabel('percentage change in price', fontsize=12) price_change.plot(kind='bar', ax=ax) plt.show() indices_list = {'^GSPC': 'S&P 500', '^IXIC': 'NASDAQ', '^DJI': 'Dow Jones', '^N225': 'Nikkei'} indices_data = read_data( indices_list, start=dt.datetime(1971, 1, 1), #Common Start Date end=dt.datetime(2021, 12, 31) ) yearly_returns = pd.DataFrame() for index, name in indices_list.items(): p1 = indices_data.groupby(indices_data.index.year)[index].first() # Get the first set of returns as a DataFrame p2 = indices_data.groupby(indices_data.index.year)[index].last() # Get the last set of returns as a DataFrame returns = (p2 - p1) / p1 yearly_returns[name] = returns yearly_returns yearly_returns.describe() fig, axes = plt.subplots(2, 2, figsize=(10, 8)) for iter_, ax in enumerate(axes.flatten()): # Flatten 2-D array to 1-D array index_name = yearly_returns.columns[iter_] # Get index name per iteration ax.plot(yearly_returns[index_name]) # Plot pct change of yearly returns per index ax.set_ylabel("percent change", fontsize = 12) ax.set_title(index_name) plt.tight_layout()