import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns %matplotlib inline sns.set_style("whitegrid") pokemon = pd.read_csv('pokemon-challenge/pokemon.csv', index_col="#") combats = pd.read_csv('pokemon-challenge/combats.csv') pokemon.head() pokemon.describe() pokemon.info() combats.head() combats.describe() combats.info() pk_colors = ['#A8B820', # Bug, '#705848', # Dark, '#7038F8', # Dragon '#F8D030', # Electric '#EE99AC', # Fairy '#C03028', # Fighting '#F08030', # Fire '#A890F0', # Flying '#705898', # Ghost '#78C850', # Grass '#E0C068', # Ground '#98D8D8', # Ice '#A8A878', # Normal '#A040A0', # Poison '#F85888', # Psychic '#B8A038', # Rock '#B8B8D0', # Steel '#6890F0', # Water ] pkt_cnt = pokemon["Type 1"].value_counts(sort=False).sort_index() pkt_cnt = pd.concat([pkt_cnt, pd.DataFrame(pk_colors, index=pkt_cnt.index, columns=["Colors"])], axis=1) pkt_cnt.sort_values("Type 1", inplace=True) pkt_cnt_bar = pkt_cnt.plot(kind='barh', y="Type 1", color=pkt_cnt.Colors, legend=False, figsize=(8, 8)) pkt_cnt_bar.set_title("Number of known species\nfor each Pokemon main type", fontsize=16, weight="bold") pkt_cnt_bar.set_xlabel("Number of species") pkt = pokemon.pivot_table(index="Type 1", values=["Attack", "Defense", "HP", "Sp. Atk", "Sp. Def", "Speed"], aggfunc='mean') pkt.head() pkt["Total_Power"] = pkt.iloc[:,range(6)].sum(axis=1) pkt["Color"] = pk_colors pkt.head() pkt.sort_values("Total_Power", ascending=True, inplace=True) pkt_bar = pkt.plot(kind="barh", y="Total_Power", figsize=(8, 8), color=pkt.Color, legend=False) pkt_bar.set_xlim((350, 575)) pkt_bar.set_xlabel("Mean Total Power") pkt_bar.set_title("Mean Total Power for each Pokemon Main Type", fontsize=16, weight="bold") pkl = pokemon.pivot_table(index="Type 1", values=["Legendary"], aggfunc="sum") pkl.sort_index() pkl.Legendary = pkl.Legendary.astype(int) pkl["Total"] = pokemon["Type 1"].value_counts() pkl["Other"] = pkl.Total - pkl.Legendary pkl["Ratio"] = pokemon.pivot_table(index="Type 1", values=["Legendary"], aggfunc="mean") pkl["Percent"] = pkl["Ratio"] * 100 pkl["Color"] = pk_colors pkl.head() ax = pokemon.Legendary.value_counts().plot.pie(startangle=45, autopct='%.1f %%', figsize=(8, 6)) ax.set_title("Proportion of Legendary Pokemons", fontsize=16, weight="bold") pkl_notnull = pkl[pkl.Legendary != 0] pkl_notnull.head() l_total = sum(pkl_notnull.Legendary) ax = pkl_notnull.Legendary.plot(kind="pie", label="", colors=pkl_notnull.Color, autopct='%.1f%%', pctdistance=0.8, explode=pkl_notnull.Legendary / l_total, figsize=(8, 6)) ax.set_title("Legendary Proportions by Pokemon Type", fontsize=16, weight="bold") cols = ["Legendary", "Other"] fig, axes = plt.subplots(6, 3, figsize=(10, 10)) for i, idx in enumerate(pkl.index): ax = axes[i // 3, i % 3] ax.pie(pkl[cols].T[idx], labels=cols, startangle=30, autopct='%.1f %%') ax.set_title(idx, fontdict={"size":"large", "weight":"bold"}) fig.subplots_adjust(wspace=.5, hspace=.5) plt.suptitle("Proportion of Legendary for each Pokemon Type", fontsize=16, weight="bold") pkg = pokemon.pivot_table(index="Generation", values=["Attack", "Defense", "HP", "Sp. Atk", "Sp. Def", "Speed"], aggfunc='mean') pkg.head() pkg_long = pkg.reset_index().melt(id_vars="Generation") pkg_long.head(10) pkg_long.info() g = sns.FacetGrid(pkg_long, row="variable", size=1.5, aspect=6) g = g.map(plt.plot, "Generation", "value", linestyle='dashed', marker='o') g.fig.suptitle("Mean Pokemon Stats along Generations", fontsize=16, weight="bold", y=1.05) pkg["Total_Power"] = pkg.sum(axis=1) ax1 = pkg.Total_Power.plot(kind="line", linestyle="dashed", marker="o", figsize=(10, 4)) pkg["Legendary"] = pokemon.groupby("Generation")[["Legendary"]].sum() ax2 = pkg.Legendary.plot(kind="line", linestyle="dashed", marker="o", secondary_y=True, ax=ax1) ax1.set_ylabel("Total Power") ax2.set_ylabel("# of Legendary") ax1.legend(loc="upper left") ax2.legend(loc="upper right") ax1.set_title("Mean Pokemon Stats along Generations", fontsize=16, weight="bold", y=1.05) ax = pokemon.plot(kind='scatter', x='Attack', y='Defense', alpha = 0.33, color = 'red', figsize=(8, 8)) ax.set_xlabel('Attack') ax.set_ylabel('Defense') ax.set_title('Attack vs Defense for every known Pokemon species', fontsize=16, weight="bold") pokemon[(pokemon.Attack < 25) & (pokemon.Defense > 200)] pokemon[(pokemon.Attack > 175) & (pokemon.Defense < 50)] ax = pokemon.plot(kind='scatter', x='Attack', y='Defense', alpha = 0.33, color = 'red', figsize=(8, 8)) ax.set_xlabel('Attack') ax.set_ylabel('Defense') ax.set_title('Attack vs Defense for every known Pokemon species', fontsize=16, weight="bold") pk1 = pokemon[(pokemon.Attack < 25) & (pokemon.Defense > 200)] ax.annotate(pk1.Name.iloc[0], xy=(pk1.Attack, pk1.Defense), xycoords='data', xytext=(30, -10), textcoords='offset points', arrowprops=dict(arrowstyle="->", lw=2)) pk2 = pokemon[(pokemon.Attack > 175) & (pokemon.Defense < 50)] ax.annotate("\n".join(pk2.Name.iloc[0].split()), xy=(pk2.Attack, pk2.Defense), xycoords='data', xytext=(-100, 0), textcoords='offset points', arrowprops=dict(arrowstyle="->", lw=2)) fig, ax = plt.subplots() fig.set_size_inches(8, 8) sns.regplot(x="Attack", y="Speed", data=pokemon, scatter_kws={'color':'green', 'alpha':0.3}, line_kws={'color':'red'}) ax.set_title('Attack vs Speed for every known Pokemon species\n+ Regression Line', fontsize=16, weight="bold") g = sns.jointplot(x="Defense", y="Speed", data=pokemon, kind="reg", line_kws={'color':'green'}, scatter_kws={'alpha': 0.33}) g.fig.set_size_inches(8, 8) g.fig.suptitle("Defense vs Speed joinplot\nfor every known Pokemon species", fontsize=16, weight="bold", y=1.05) g = sns.jointplot(x="Defense", y="Speed", data=pokemon, kind="hex") g.fig.set_size_inches(8, 8) g.fig.suptitle("Defense vs Speed hexagon joinplot\nfor every known Pokemon species", fontsize=16, weight="bold", y=1.05) pokemon["Total_Power"] = pokemon.iloc[:,range(3, 9)].sum(axis=1) pokemon.head() ax = pokemon.boxplot(column='Total_Power', by='Legendary', figsize=(10, 5)) ax.set_ylabel("Total Power") ax.get_figure().gca().set_title("") plt.suptitle('Legendary vs non Legendary Total Power', fontsize=16, weight="bold") fig, ax = plt.subplots() fig.set_size_inches(10, 5) sns.boxplot(x='Generation', y='Total_Power', data=pokemon) ax.set_title('Total Power boxplot by Generations', fontsize=16, weight="bold") fig, ax = plt.subplots() fig.set_size_inches(10, 5) first_win = sum(combats.First_pokemon == combats.Winner) second_win = sum(combats.Second_pokemon == combats.Winner) sns.barplot(x=["First Wins", "Second Wins"], y=[first_win, second_win]) ax.set_title('Battle winners count by first attacker order', fontsize=16, weight="bold") first_combats = combats['First_pokemon'].value_counts().sort_index() second_combats = combats['Second_pokemon'].value_counts().sort_index() winner_combats = combats['Winner'].value_counts().sort_index() winner_combats = winner_combats.reindex(first_combats.index, fill_value=0) battle_stats = pd.concat([first_combats, second_combats, winner_combats], axis=1) battle_stats["total_combats"] = total_combats = first_combats + second_combats battle_stats["win_pct"] = winner_combats / total_combats battle_stats.head() pokemon["win_pct"] = battle_stats.win_pct pokemon.head() g = sns.jointplot(x="Total_Power", y="win_pct", data=pokemon, kind="reg", line_kws={'color':'green'}) g.fig.set_size_inches(8, 8) g.fig.suptitle("Total Power vs Winning Percentage joinplot\nfor every known Pokemon species", fontsize=16, weight="bold", y=1.05) fig, axes = plt.subplots(2, 3, figsize=(10, 10)) for i, col in enumerate(pokemon.columns[3:9]): ax = axes[i // 3, i % 3] sns.regplot(data=pokemon, ax=ax, y="win_pct", x=col, line_kws={"color": "g"}, scatter_kws={"alpha": 0.2, "color": "r"}) if (i % 3 > 0): ax.set_ylabel("") ax.set_yticklabels([]) ax.set_ylim((0, 1)) fig.suptitle("Winning Percentage vs Pokemon Stats pairgrid\nfor every known Pokemon species", fontsize=16, weight="bold", y=0.95) f,ax = plt.subplots(figsize=(10, 6)) g = sns.heatmap(pokemon[["HP", "Attack", "Defense", "Sp. Atk", "Sp. Def", "Speed", "Total_Power", "win_pct"]].corr(), annot=True, linewidths=.5, fmt= '.1f', ax=ax) plt.suptitle("Pokemon Mean Stats and Wining Ratio\nCorrelation Heatmap", fontsize=16, weight="bold", y=1) fig, axes = plt.subplots(3, 6, figsize=(10, 12)) pkt_win = pokemon[["Type 1", "win_pct"]] pkt_win = pkt_win.sort_values("Type 1") grouped = pkt_win.groupby('Type 1') for i, (key, group) in enumerate(grouped): ax = axes[i // 6, i % 6] sns.boxplot(x="Type 1", y="win_pct", color="white", data=group, ax=ax) sns.swarmplot(y='win_pct', color=pk_colors[i], data=group, ax=ax) if (i % 6 > 0): ax.set_ylabel("") ax.set_yticklabels([]) ax.set_xlabel(key) ax.set_ylim((0, 1)) plt.suptitle("Winning Percentage Distribution by Main Pokemon Type", fontsize=16, weight="bold", y=0.92) pokemon[pokemon.win_pct >= 0.95]