# Import required modules
import pandas as pd
from ggplot import *
%matplotlib inline
# Set ipython's max row display
pd.set_option('display.max_row', 1000)
# Set iPython's max column width to 50
pd.set_option('display.max_columns', 50)
# Load the dataset
df = pd.read_csv('5kings_battles_v1.csv')
# View the top five observations
df.head()
name | year | battle_number | attacker_king | defender_king | attacker_1 | attacker_2 | attacker_3 | attacker_4 | defender_1 | defender_2 | defender_3 | defender_4 | attacker_outcome | battle_type | major_death | major_capture | attacker_size | defender_size | attacker_commander | defender_commander | summer | location | region | note | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Battle of the Golden Tooth | 298 | 1 | Joffrey/Tommen Baratheon | Robb Stark | Lannister | NaN | NaN | NaN | Tully | NaN | NaN | NaN | win | pitched battle | 1 | 0 | 15000 | 4000 | Jaime Lannister | Clement Piper, Vance | 1 | Golden Tooth | The Westerlands | NaN |
1 | Battle at the Mummer's Ford | 298 | 2 | Joffrey/Tommen Baratheon | Robb Stark | Lannister | NaN | NaN | NaN | Baratheon | NaN | NaN | NaN | win | ambush | 1 | 0 | NaN | 120 | Gregor Clegane | Beric Dondarrion | 1 | Mummer's Ford | The Riverlands | NaN |
2 | Battle of Riverrun | 298 | 3 | Joffrey/Tommen Baratheon | Robb Stark | Lannister | NaN | NaN | NaN | Tully | NaN | NaN | NaN | win | pitched battle | 0 | 1 | 15000 | 10000 | Jaime Lannister, Andros Brax | Edmure Tully, Tytos Blackwood | 1 | Riverrun | The Riverlands | NaN |
3 | Battle of the Green Fork | 298 | 4 | Robb Stark | Joffrey/Tommen Baratheon | Stark | NaN | NaN | NaN | Lannister | NaN | NaN | NaN | loss | pitched battle | 1 | 1 | 18000 | 20000 | Roose Bolton, Wylis Manderly, Medger Cerwyn, H... | Tywin Lannister, Gregor Clegane, Kevan Lannist... | 1 | Green Fork | The Riverlands | NaN |
4 | Battle of the Whispering Wood | 298 | 5 | Robb Stark | Joffrey/Tommen Baratheon | Stark | Tully | NaN | NaN | Lannister | NaN | NaN | NaN | win | ambush | 1 | 1 | 1875 | 6000 | Robb Stark, Brynden Tully | Jaime Lannister | 1 | Whispering Wood | The Riverlands | NaN |
# View the bottom five observations
df.tail()
name | year | battle_number | attacker_king | defender_king | attacker_1 | attacker_2 | attacker_3 | attacker_4 | defender_1 | defender_2 | defender_3 | defender_4 | attacker_outcome | battle_type | major_death | major_capture | attacker_size | defender_size | attacker_commander | defender_commander | summer | location | region | note | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
33 | Second Seige of Storm's End | 300 | 34 | Joffrey/Tommen Baratheon | Stannis Baratheon | Baratheon | NaN | NaN | NaN | Baratheon | NaN | NaN | NaN | win | siege | 0 | 0 | NaN | 200 | Mace Tyrell, Mathis Rowan | Gilbert Farring | 0 | Storm's End | The Stormlands | NaN |
34 | Siege of Dragonstone | 300 | 35 | Joffrey/Tommen Baratheon | Stannis Baratheon | Baratheon | NaN | NaN | NaN | Baratheon | NaN | NaN | NaN | win | siege | 0 | 0 | 2000 | NaN | Loras Tyrell, Raxter Redwyne | Rolland Storm | 0 | Dragonstone | The Stormlands | NaN |
35 | Siege of Riverrun | 300 | 36 | Joffrey/Tommen Baratheon | Robb Stark | Lannister | Frey | NaN | NaN | Tully | NaN | NaN | NaN | win | siege | 0 | 0 | 3000 | NaN | Daven Lannister, Ryman Fey, Jaime Lannister | Brynden Tully | 0 | Riverrun | The Riverlands | NaN |
36 | Siege of Raventree | 300 | 37 | Joffrey/Tommen Baratheon | Robb Stark | Bracken | Lannister | NaN | NaN | Blackwood | NaN | NaN | NaN | win | siege | 0 | 1 | 1500 | NaN | Jonos Bracken, Jaime Lannister | Tytos Blackwood | 0 | Raventree | The Riverlands | NaN |
37 | Siege of Winterfell | 300 | 38 | Stannis Baratheon | Joffrey/Tommen Baratheon | Baratheon | Karstark | Mormont | Glover | Bolton | Frey | NaN | NaN | NaN | NaN | NaN | NaN | 5000 | 8000 | Stannis Baratheon | Roose Bolton | 0 | Winterfell | The North | NaN |
# Count the number of observations for each value
df['year'].value_counts()
299 20 300 11 298 7 dtype: int64
# Count the number of observations for each value, then make a bar plot
df['region'].value_counts().plot(kind='bar')
<matplotlib.axes.AxesSubplot at 0x10c33e810>
# Count the number of observations for each value, then make a bar plot
df['attacker_outcome'].value_counts().plot(kind='bar')
<matplotlib.axes.AxesSubplot at 0x10c376710>
# Count the number of observations for each value, then make a bar plot
df['battle_type'].value_counts().plot(kind='bar')
<matplotlib.axes.AxesSubplot at 0x10c46eb10>
# Count the number of observations for each value, then make a bar plot
df['attacker_king'].value_counts().plot(kind='bar')
<matplotlib.axes.AxesSubplot at 0x10c549350>
# Count the number of observations for each value, then make a bar plot
df['defender_king'].value_counts().plot(kind='bar')
<matplotlib.axes.AxesSubplot at 0x10c90c7d0>
war_action = df['attacker_king'].value_counts() + df['defender_king'].value_counts()
war_action.fillna(1).plot(kind='bar')
<matplotlib.axes.AxesSubplot at 0x10c9c8610>
# Create a ggplot scatter plot of attacker_size against defender_size (if not NaN),
# with the color of each dot being determined by the outcome of the battle
ggplot(aes(x='attacker_size', y='defender_size', colour='attacker_outcome'),
data=df[df['attacker_size'].notnull() & df['defender_size'].notnull() & df['attacker_outcome'].notnull()]) + \
geom_point()
<ggplot: (281727157)>