# Import required modules
from ggplot import *
%matplotlib inline
import pandas as pd
# Create a dataset of battle data
data = {'battle': ['Battle of Two Forks', 'Battle of Cochise', 'Battle of Bells', 'Battle of the Beach', 'Battle of Flatlander', 'Battle of Middorin', 'Battle of Massai', 'Battle of Monogrop', 'Battle of ', 'Battle of Sticks'],
'season': ['winter', 'fall', 'fall', 'fall', 'spring', 'winter', 'summer', 'winter', 'summer', 'summer'],
'terrain': ['mountains', 'mountains', 'mountains', 'beach', 'beach', 'plains', 'plains', 'city', 'city', 'city'],
'weather': ['rain', 'rain', 'cloudy', 'sunny', 'rain', 'rain', 'sunny', 'cloudy', 'rain', 'sunny'],
'victor': ['attacker', 'defender', 'attacker', 'defender', 'attacker', 'defender', 'attacker', 'defender', 'attacker', 'defender'],
'deaths_attacker': [425, 242, 323, 223, 783, 436, 324, 3321, 262, 843],
'deaths_defender': [423, 264, 1231, 23, 23, 42, 124, 631, 232, 213],
'wounded_attacker': [41, 214, 131, 12, 123, 124, 264, 311, 132, 623],
'wounded_defender': [14, 1424, 131, 12, 34, 124, 1124, 1431, 122, 2563],
'soldiers_attacker': [2532, 6346, 3341, 6732, 12563, 2356, 253, 5277, 2732, 6278],
'soldiers_defender': [37235, 2523, 2133, 1245, 2671, 7832, 2622, 3331, 2522, 26773],
'year': [1945, 1956, 1964, 1969, 1971, 1981, 1982, 1992, 1999, 2004]}
df = pd.DataFrame(data)
df
battle | deaths_attacker | deaths_defender | season | soldiers_attacker | soldiers_defender | terrain | victor | weather | wounded_attacker | wounded_defender | year | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Battle of Two Forks | 425 | 423 | winter | 2532 | 37235 | mountains | attacker | rain | 41 | 14 | 1945 |
1 | Battle of Cochise | 242 | 264 | fall | 6346 | 2523 | mountains | defender | rain | 214 | 1424 | 1956 |
2 | Battle of Bells | 323 | 1231 | fall | 3341 | 2133 | mountains | attacker | cloudy | 131 | 131 | 1964 |
3 | Battle of the Beach | 223 | 23 | fall | 6732 | 1245 | beach | defender | sunny | 12 | 12 | 1969 |
4 | Battle of Flatlander | 783 | 23 | spring | 12563 | 2671 | beach | attacker | rain | 123 | 34 | 1971 |
5 | Battle of Middorin | 436 | 42 | winter | 2356 | 7832 | plains | defender | rain | 124 | 124 | 1981 |
6 | Battle of Massai | 324 | 124 | summer | 253 | 2622 | plains | attacker | sunny | 264 | 1124 | 1982 |
7 | Battle of Monogrop | 3321 | 631 | winter | 5277 | 3331 | city | defender | cloudy | 311 | 1431 | 1992 |
8 | Battle of | 262 | 232 | summer | 2732 | 2522 | city | attacker | rain | 132 | 122 | 1999 |
9 | Battle of Sticks | 843 | 213 | summer | 6278 | 26773 | city | defender | sunny | 623 | 2563 | 2004 |
# Line plot of number of wounded soldiers per year
ggplot(df, aes(x='year', y='wounded_defender')) + \
geom_line()
<ggplot: (278978529)>
# Drop non-numeric variables
df = df.drop(['battle', 'season', 'terrain', 'victor', 'weather'], axis=1)
# Plot the value of each variable by year (note that the actual values won't make sense in this example)
ggplot(pd.melt(df, id_vars=['year']), aes(x='year', y='value', color='variable')) + \
geom_line()
<ggplot: (280215789)>
df.to_csv('df.csv', encoding='utf-8')