In [244]:
%matplotlib inline

import pandas as pd
import numpy as np
import scipy
import sklearn
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D
In [4]:
names = ['name','landmass','zone','area','population','language','religion','bars','stripes','colours','red','green','blue','gold','white','black','orange','mainhue','circles','crosses','saltires','quarters','sunstars','crescent','triangle','icon','animate','text','topleft','botright']
dataset = pd.read_csv('flag.data',names=names,usecols=[*range(len(names))])

print(dataset.head(5))
             name  landmass  zone  area  population  language  religion  bars  \
0     Afghanistan         5     1   648          16        10         2     0   
1         Albania         3     1    29           3         6         6     0   
2         Algeria         4     1  2388          20         8         2     2   
3  American-Samoa         6     3     0           0         1         1     0   
4         Andorra         3     1     0           0         6         0     3   

   stripes  colours  ...  saltires  quarters  sunstars  crescent  triangle  \
0        3        5  ...         0         0         1         0         0   
1        0        3  ...         0         0         1         0         0   
2        0        3  ...         0         0         1         1         0   
3        0        5  ...         0         0         0         0         1   
4        0        3  ...         0         0         0         0         0   

   icon  animate text  topleft  botright  
0     1        0    0    black     green  
1     0        1    0      red       red  
2     0        0    0    green     white  
3     1        1    0     blue       red  
4     0        0    0     blue       red  

[5 rows x 30 columns]
In [22]:
#all possible religious tags
religions = ['catholic','other christian','muslim','buddhist',
             'hindu','ethnic','marxist','other']

#sum of flags tagged by respective religion
totalFlags = []

for religion in religions:
    index = religions.index(religion)
    religionCount = sum(dataset['religion'] == index)
    totalFlags.append(religionCount)
    
#reshape into dataframe
totalFlags = pd.DataFrame(np.array(totalFlags).reshape(1,len(religions)))
totalFlags.columns = religions
In [81]:
sns.set(style="whitegrid")
ax = sns.barplot(data=totalFlags)
ax.set(xlabel='religions', ylabel='amount')
sns.set(rc={'figure.figsize':(15,10)})
sns.set_palette(sns.hls_palette(8, l=.3, s=.8))
In [55]:
#we only care about these columns
columns = ['mainhue','bars','stripes','sunstars']
plotData = dataset[columns]
plotData.columns = columns

plotData
In [62]:
#represent the flag colors on plot
flagColors = ['green','red','blue','gold','white','orange','black','brown']
sns.set_palette(flagColors)

sns.relplot(x="stripes", y="sunstars", hue="mainhue", size="bars",sizes=(100, 500), 
            alpha=0.9,height=6, data=plotData, palette=flagColors)
Out[62]:
<seaborn.axisgrid.FacetGrid at 0x7feedb6a2908>
In [77]:
landMasses = ['N. America','S. America','Europe','Africa','Asia','Oceania']
columns = ['landmass','bars','stripes','circles','crosses','saltires','quarters','sunstars',
          'crescent','triangle','icon','animate','text']
flagFeatsByLandmass = dataset[columns]

for i in range(1 , len(landMasses) + 1):
    landMassLabel = landMasses[i - 1]
    flagFeatsByLandmass.loc[flagFeatsByLandmass['landmass'] == i,'landmass'] = landMassLabel
    
flagFeatsByLandmass = pd.melt(flagFeatsByLandmass,id_vars="landmass",
                              var_name="feats",value_name="amount")
flagFeatsByLandmass
Out[77]:
landmass feats amount
0 Asia bars 0
1 Europe bars 0
2 Africa bars 2
3 Oceania bars 0
4 Europe bars 3
5 Africa bars 0
6 N. America bars 0
7 N. America bars 0
8 S. America bars 0
9 S. America bars 0
10 Oceania bars 0
11 Europe bars 0
12 N. America bars 0
13 Asia bars 0
14 Asia bars 0
15 N. America bars 3
16 Europe bars 3
17 N. America bars 0
18 Africa bars 0
19 N. America bars 0
20 Asia bars 0
21 S. America bars 0
22 Africa bars 0
23 S. America bars 0
24 N. America bars 0
25 Asia bars 0
26 Europe bars 0
27 Africa bars 0
28 Asia bars 0
29 Africa bars 0
... ... ... ...
2298 Africa text 0
2299 Europe text 0
2300 Europe text 0
2301 Asia text 0
2302 Asia text 0
2303 Africa text 0
2304 Asia text 0
2305 Africa text 0
2306 Oceania text 0
2307 S. America text 0
2308 Africa text 0
2309 Asia text 0
2310 N. America text 0
2311 Oceania text 0
2312 Asia text 0
2313 Africa text 0
2314 Europe text 0
2315 S. America text 0
2316 N. America text 1
2317 N. America text 0
2318 Asia text 0
2319 Oceania text 0
2320 Europe text 0
2321 S. America text 0
2322 Asia text 0
2323 Oceania text 0
2324 Europe text 0
2325 Africa text 0
2326 Africa text 0
2327 Africa text 0

2328 rows × 3 columns

In [78]:
sns.set_palette(sns.hls_palette(8, l=.3, s=.8))
g = sns.factorplot(x='amount', y='feats', hue='landmass', data=flagFeatsByLandmass, kind='bar',
                  size=10,aspect=2,ci=None)
In [166]:
landFeatsColor = dataset.drop(columns=['name','zone','area','population','language','religion',
                                     'red','green','blue','gold','white','black','orange',
                                      'topleft','botright','colours','bars','stripes',
                                      'crosses','saltires','circles','quarters','sunstars',
                                      'crescent','triangle','icon','animate','text'])

for i in range(1,7):
    landFeatsColor.loc[landFeatsColor['landmass'] == i,'landmass'] = landMasses[i - 1]


colors = ['red','green','blue','gold','white','black','orange']

mainColorsByLandmass = []

for land in landMasses:
    landMainhue = []
    landmass = landFeatsColor[landFeatsColor['landmass'] == land]

    for color in colors:
        landMainhue.append(sum(landmass['mainhue'] == color))
    
    mainColorsByLandmass.append(landMainhue)


mainColorsByLandmass = pd.DataFrame(np.array(mainColorsByLandmass).reshape(6,len(colors)))
mainColorsByLandmass.columns = colors
mainColorsByLandmass['landmass'] = landMasses
mainColorsByLandmass = pd.melt(mainColorsByLandmass,id_vars="landmass",
                               var_name="mainhue",value_name="amount")

h = sns.factorplot(x='landmass', y='amount', hue='mainhue', data=mainColorsByLandmass, 
                   kind='bar',size=10,aspect=2,ci=None)
In [137]:
land1 = dataset[dataset['landmass'] == 1]

#bars, stripes, colorDiff, crosses, saltires, quarters, sunstars, icon
columns = ['red','green','blue','gold','white','black','orange']
flagColors = []

for color in columns:
    flagColors.append(sum(land1[color]))

flagColors = pd.DataFrame(np.array(flagColors).reshape(1,len(columns)))
flagColors.columns = columns

flatui = ["#e50000", "#15b01a", "#0343df", "#dbb40c", "#ffffff", "#000000", "#f97306"]
sns.set_palette(flatui)

ax = sns.barplot(data=flagColors)
ax.set(xlabel='colors', ylabel='total')
sns.set(rc={'figure.figsize':(13,3)})
In [202]:
landMasses = ['N. America','S. America','Europe','Africa','Asia','Oceania']
labels = ['circles','crosses','saltires','quarters','crescent',
          'triangle','icon','animate','text']

radarData = []

for i in range(1 , len(landMasses) + 1):
    land = dataset[dataset['landmass'] == i]

    flagFeatures = []

    for feat in labels:
        flagFeatures.append(np.mean(land[feat]))
    
    radarData.append(flagFeatures)

radarData = pd.DataFrame(np.array(radarData).reshape(len(landMasses), len(labels)))
radarData = radarData.T
radarData.columns = landMasses

radarData
Out[202]:
N. America S. America Europe Africa Asia Oceania
0 0.193548 0.176471 0.028571 0.115385 0.358974 0.15
1 0.225806 0.058824 0.285714 0.038462 0.025641 0.40
2 0.193548 0.058824 0.028571 0.057692 0.025641 0.30
3 0.193548 0.411765 0.028571 0.057692 0.102564 0.40
4 0.000000 0.000000 0.000000 0.076923 0.179487 0.00
5 0.258065 0.117647 0.028571 0.134615 0.102564 0.25
6 0.290323 0.176471 0.257143 0.173077 0.333333 0.30
7 0.419355 0.176471 0.114286 0.134615 0.153846 0.30
8 0.161290 0.176471 0.028571 0.038462 0.102564 0.05
In [239]:
def radarPlot(row,color,land,labels):
    angles = np.linspace(0, 2*np.pi, len(labels), endpoint=False)
    angles = np.concatenate((angles,[angles[0]]))

    ax = plt.subplot(2,3,row+1, polar=True)
    
    ax.set_theta_offset(np.pi / 2)
    
    plt.xticks(angles[:-1], labels, color='grey', size=8)

    # Draw ylabels
    ax.set_rlabel_position(0)
    plt.yticks([0.10,0.20,0.30,0.40], 
               ["0.10","0.20","0.30","0.40"], color="grey", size=7)

    plt.ylim(0,0.50)

    stats = radarData[land].values
    stats = np.concatenate((stats,[stats[0]]))

    ax.plot(angles, stats, color=color,linewidth=2,linestyle='solid',label=land)
    ax.fill(angles, stats, color=color, alpha=0.4)

    title = land
    plt.title(title, size=11, color=color, y=1.1)
In [240]:
plotDpi=96
plt.figure(figsize=(1600/plotDpi, 1000/plotDpi), dpi=plotDpi)

colors = ['royalblue','navy','goldenrod','olivedrab','darkred','limegreen']

i = 0
for land in landMasses:
    color = colors[i]
    radarPlot(i,color,land,labels)
    i+=1
In [242]:
parallel_coordinates(radarData, 'landmass', colormap=plt.get_cmap("Set2"))
plt.show()
-------------------------------------------------------------------------
ModuleNotFoundError                     Traceback (most recent call last)
<ipython-input-242-9fb552c3ab4e> in <module>
----> 1 from pandas.tools.plotting import parallel_coordinates
      2 parallel_coordinates(radarData, 'landmass', colormap=plt.get_cmap("Set2"))
      3 plt.show()

ModuleNotFoundError: No module named 'pandas.tools'
In [ ]: