# Import required modules import pandas as pd from ggplot import * %matplotlib inline # Set ipython's max row display pd.set_option('display.max_row', 1000) # Set iPython's max column width to 50 pd.set_option('display.max_columns', 50) # Load the dataset df = pd.read_csv('5kings_battles_v1.csv') # View the top five observations df.head() # View the bottom five observations df.tail() # Count the number of observations for each value df['year'].value_counts() # Count the number of observations for each value, then make a bar plot df['region'].value_counts().plot(kind='bar') # Count the number of observations for each value, then make a bar plot df['attacker_outcome'].value_counts().plot(kind='bar') # Count the number of observations for each value, then make a bar plot df['battle_type'].value_counts().plot(kind='bar') # Count the number of observations for each value, then make a bar plot df['attacker_king'].value_counts().plot(kind='bar') # Count the number of observations for each value, then make a bar plot df['defender_king'].value_counts().plot(kind='bar') war_action = df['attacker_king'].value_counts() + df['defender_king'].value_counts() war_action.fillna(1).plot(kind='bar') # Create a ggplot scatter plot of attacker_size against defender_size (if not NaN), # with the color of each dot being determined by the outcome of the battle ggplot(aes(x='attacker_size', y='defender_size', colour='attacker_outcome'), data=df[df['attacker_size'].notnull() & df['defender_size'].notnull() & df['attacker_outcome'].notnull()]) + \ geom_point()