#!/usr/bin/env python
# coding: utf-8

# In[1137]:


import pandas as pd, numpy as np
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')
from jupyterthemes import jtplot
import matplotlib as mpl
# onedork | grade3 | oceans16 | chesterish | monokai | solarizedl | solarizedd


# In[1138]:


#model data
df=pd.read_csv('ex/data/days-simulated-v2.tsv')


# In[1139]:


#target structure: activity, duration, activity, duration, ...
df.head()


# In[1140]:


a=df.loc[430].values[0].split(',')


# In[1141]:


#activity sum duration (minues by day)
sum([int(a[i*2+1]) for i in range(len(a)/2)])


# In[1142]:


df1=pd.read_csv('ex/1.csv', encoding = 'utf8')
df2=pd.read_csv('ex/2.csv', encoding = 'utf8')
df3=pd.read_csv('ex/3.csv', encoding = 'utf8')


# In[1143]:


df=pd.concat([df1,df2[1:],df3[1:]]) #no need for headers twice, df headers completely identical


# In[1144]:


#create weekday dataframe
hkoz=df[df.columns[9:489]].reset_index()


# In[1145]:


#create weekend dataframe
hetv=df[df.columns[489:969]].reset_index()


# In[1146]:


#create descriptive stats dataframe
desc=df[df.columns[969:]].reset_index()


# In[1147]:


#create survey metadata dataframe
time=df[df.columns[2:4]].reset_index()


# In[4]:


#top 16 activity groups
activities=[['Alv\xc3\xa1s'],
['Zuhany / Mosd\xc3\xb3'],
['\xc3\x89tkez\xc3\xa9s', '\xc3\x89tterem/Vend\xc3\xa9gl\xc5\x91'],
[u'Munka (irodai)', 'Munka (k\xc3\xa9tkezi)'],
[u'Internet', u'Telefon/Chat/Facebook'],
['V\xc3\xa1s\xc3\xa1rl\xc3\xa1s'],
['Vall\xc3\xa1sgyakorl\xc3\xa1s','\xc3\x96nk\xc3\xa9ntess\xc3\xa9g'],
[u'TV/Film', u'Mozi'],
['Olvas\xc3\xa1s', '\xc3\x9ajs\xc3\xa1g/Keresztrejtv\xc3\xa9ny'],
['H\xc3\xa1zimunka/Gyerekfel\xc3\xbcgyelet'],
['Hivatalos elint\xc3\xa9znival\xc3\xb3k'],
[u'Sport', 'Edz\xc5\x91terem/Sz\xc3\xa9ps\xc3\xa9gszalon'],
['Egy\xc3\xa9b Hobby', 'PC j\xc3\xa1t\xc3\xa9k', 'Kert\xc3\xa9szked\xc3\xa9s/Bark\xc3\xa1csol\xc3\xa1s', 'Rokonl\xc3\xa1togat\xc3\xa1s',  'M\xc3\xa1s'],
['Tanul\xc3\xa1s',  'Mag\xc3\xa1n\xc3\xb3ra'],
['Sz\xc3\xb3rakoz\xc3\xb3hely/K\xc3\xa1v\xc3\xa9z\xc3\xb3/Pub'],
['S\xc3\xa9ta/Kutyas\xc3\xa9t\xc3\xa1ltat\xc3\xa1s', 'Term\xc3\xa9szet/Kir\xc3\xa1ndul\xc3\xa1s'],
['Utaz\xc3\xa1s/Vezet\xc3\xa9s']]


# In[5]:


activities=[[u'Alvás'],
[u'Zuhany / Mosdó'],
[u'Étkezés', u'Étterem/Vendéglõ'],
[u'Munka (irodai)', u'Munka (kétkezi)'],
[u'Internet', u'Telefon/Chat/Facebook'],
[u'Vásárlás'],
[u'Vallásgyakorlás', u'Önkéntesség'],
[u'TV/Film', u'Mozi'],
[u'Olvasás', u'Újság/Keresztrejtvény'],
[u'Házimunka/Gyerekfelügyelet'],
[u'Hivatalos elintéznivalók'],
[u'Sport', u'Edzõterem/Szépségszalon'],
[u'Egyéb Hobby',u'PC játék', u'Kertészkedés/Barkácsolás', u'Rokonlátogatás', u'Más'],
[u'Tanulás', u'Magánóra'],
[u'Szórakozóhely/Kávézó/Pub'],
[u'Séta/Kutyasétáltatás', u'Természet/Kirándulás'],
[u'Utazás/Vezetés']]


# In[6]:


#bin activities into activity groups
actidict={}
for i in range(len(activities)):
    for j in range(len(activities[i])):
        actidict[activities[i][j]]=i


# In[16]:


#binned activities into activity groups
for i in activities:
    for j in i:
        print j,
    print 


# In[1152]:


timekeys=['01:00-02:30', '02:30-04:00', '04:00-05:30', '05:30-07:00',
       '07:00-08:30', '08:30-10:00', '10:00-11:30', '11:30-13:00',
       '13:00-14:30', '14:30-16:00', '16:00-17:30', '17:30-19:00',
       '19:00-20:30', '20:30-22:00', '22:00-23:30', '23:30-01:00']


# In[1153]:


# run only once
hkoz.columns=hkoz.loc[0].values
hkoz=hkoz[1:].drop(0,axis=1)
hetv.columns=hetv.loc[0].values
hetv=hetv[1:].drop(0,axis=1)


# In[1154]:


#extract and linearize data from pandas dataframe
hkozdata={}
for i in hkoz.index:
    index=hkoz.loc[i].index
    values=hkoz.loc[i].values
    helper=[]
    for j in range(len(values)):
        if str(values[j]).lower()!='nan':
            helper.append(index[j])
    hkozdata[i]=helper                


# In[1155]:


#extract and linearize data from pandas dataframe
hetvdata={}
for i in hetv.index:
    index=hetv.loc[i].index
    values=hetv.loc[i].values
    helper=[]
    for j in range(len(values)):
        if str(values[j]).lower()!='nan':
            helper.append(index[j])
    hetvdata[i]=helper                


# In[1156]:


#create timematrix - timeslice:activity list
j=1
timematrix={}
for i in hkozdata[j]:
    activity=i[:i.find('-')-1]
    timeslice=i[i.find('-')+2:]
    if timeslice not in timematrix:timematrix[timeslice]=[]
    timematrix[timeslice].append(actidict[activity])


# In[1157]:


#create correct timeslice order to start day at 04:00
parseorder=np.roll(np.sort(timematrix.keys()),-2)


# In[1158]:


#create output list, with shared timeslots
output=[]
for k in range(len(parseorder)):
    helper=timematrix[parseorder[k]]
    np.random.shuffle(helper)
    output.append(helper[:3]) #max 3 activities within 90 minutes, but create 3 randomized persons


# In[1159]:


print output


# In[1160]:


#create output CSV list: activity, duration, activity, duration, ...
output2=[]
fixed=90 # survey 90 min timeslices are fixed
for k in range(len(output)):
    for z in range(len(output[k])):
        output2.append(output[k][z])
        output2.append(fixed/(len(output[k])))
print output2


# In[1161]:


#minutes in perfect day
sum([output2[i*2+1] for i in range(len(output2)/2)])


# In[1162]:


#create output CSV list: activity, duration, activity, duration, ... including pruning
output2=[]
fixed=90 # survey 90 min timeslices are fixed
current=999
for k in range(len(output)):
    for z in range(len(output[k])):
        if output[k][z]!=current:
            current=output[k][z]
            output2.append(output[k][z])
            output2.append(fixed/(len(output[k]))-10+int(np.random.uniform(20))) #randomize a bit better movement
        else:
            output2[-1]+=fixed/(len(output[k]))        
print output2


# In[1163]:


sum([output2[i*2+1] for i in range(len(output2)/2)])


# Parse all data

# In[1164]:


instances=2
rrange=60


# In[1165]:


#fill method 1:
#assume sleeping if notehing clicked in
#fill method 2:
#disregard incomplete data


# In[1166]:


#create timematrix - timeslice:activity list
output4=[]
for j in hkozdata:
    timematrix={}
    for i in hkozdata[j]:
        activity=i[:i.find('-')-1]
        timeslice=i[i.find('-')+2:]
        if timeslice not in timematrix:timematrix[timeslice]=[]
        timematrix[timeslice].append(actidict[activity])
    
    #fill up with sleep
    if len(timematrix)<16:
        for t in timekeys:
            if t not in timematrix:
                timematrix[t]=[0]                    
                
    #create correct timeslice order to start day at 04:00
    parseorder=np.roll(np.sort(timematrix.keys()),-2)
    #create output list, with shared timeslots
    for x in range(instances): #create 3 randomized person-instances
        output=[]
        for k in range(len(parseorder)):
            helper=timematrix[parseorder[k]]
            np.random.shuffle(helper)
            output.append(helper[:3]) #max 3 activities within 90 minutes, but create 3 randomized persons
        #create output CSV list: activity, duration, activity, duration, ...
        output2=[]
        fixed=90 # survey 90 min timeslices are fixed
        current=999
        for k in range(len(output)):
            for z in range(len(output[k])):
                if output[k][z]!=current:
                    current=output[k][z]
                    output2.append(output[k][z])
                    output2.append(int(fixed*1.0/(len(output[k]))))
                else:
                    output2[-1]+=fixed/(len(output[k])) 

        output4.append(str(output2)[1:-1].replace(' ',''))


# In[1167]:


output4b=[]
for j in range(len(output4)):
    a=[int(i) for i in output4[j].split(',')]
    b=[int(np.random.uniform(rrange)) for k in range(len(a)/2-1)]
    for g in range(len(b)):
        toshift=int(min(min(a[g*2+1],a[(g+1)*2+1])/1.6,b[g]))
        a[g*2+1]+=toshift
        a[(g+1)*2+1]-=toshift
    output4b.append(str(a)[1:-1].replace(' ',''))


# In[1168]:


savedata=pd.DataFrame(output4b)
savedata.columns=['day']


# In[1169]:


savedata.to_csv('hkoz.csv',index=False)


# In[1170]:


#create timematrix - timeslice:activity list
output5=[]
for j in hetvdata:
    timematrix={}
    for i in hetvdata[j]:
        activity=i[:i.find('-')-1]
        timeslice=i[i.find('-')+2:]
        if timeslice not in timematrix:timematrix[timeslice]=[]
        timematrix[timeslice].append(actidict[activity])
        
    #fill up with sleep
    if len(timematrix)<16:
        for t in timekeys:
            if t not in timematrix:
                timematrix[t]=[0]                    
    
    #create correct timeslice order to start day at 04:00
    parseorder=np.roll(np.sort(timematrix.keys()),-2)
    #create output list, with shared timeslots
    for x in range(instances): #create 3 randomized person-instances
        output=[]
        for k in range(len(parseorder)):
            helper=timematrix[parseorder[k]]
            np.random.shuffle(helper)
            output.append(helper[:3]) #max 3 activities within 90 minutes, but create 3 randomized persons
        #create output CSV list: activity, duration, activity, duration, ...
        output2=[]
        fixed=90 # survey 90 min timeslices are fixed
        current=999
        for k in range(len(output)):
            for z in range(len(output[k])):
                if output[k][z]!=current:
                    current=output[k][z]
                    output2.append(output[k][z])
                    output2.append(int(fixed*1.0/(len(output[k]))))
                else:
                    output2[-1]+=fixed/(len(output[k])) 
        output5.append(str(output2)[1:-1].replace(' ',''))


# In[1171]:


output5b=[]
for j in range(len(output5)):
    a=[int(i) for i in output5[j].split(',')]
    b=[int(np.random.uniform(rrange)) for k in range(len(a)/2-1)]
    for g in range(len(b)):
        toshift=int(min(min(a[g*2+1],a[(g+1)*2+1])/1.6,b[g]))
        a[g*2+1]+=toshift
        a[(g+1)*2+1]-=toshift
    output5b.append(str(a)[1:-1].replace(' ',''))


# In[1172]:


savedata=pd.DataFrame(output5b)
savedata.columns=['day']


# In[1173]:


savedata.to_csv('hetv.csv',index=False)


# In[1174]:


len(savedata)


# Plots

# In[1175]:


colorsdict={
"0": "#e0d400",
"1": "#1c8af9",
"2": "#51BC05",
"3": "#FF7F00",
"4": "#DB32A4",
"5": "#00CDF8",
"6": "#E63B60",
"7": "#8E5649",
"8": "#68c99e",
"9": "#a477c8",
"10": "#5C76EC",
"11": "#E773C3",
"12": "#799fd2",
"13": "#038a6c",
"14": "#cc87fa",
"15": "#ee8e76",
"16": "#bbbbbb",
}
colors=[colorsdict[str(i)] for i in range(len(colorsdict))]


# In[1176]:


act_codes = [
{"index": "15", "short": u"Kutya", "desc": u"Sétáltatás | Természet"},
{"index": "3", "short": u"Meló", "desc": u"Munka (irodai | kétkezi)"},
{"index": "7", "short": u"TV", "desc": u"TV | Film | Mozi"},
{"index": "5", "short": u"Vásárlás", "desc": u"Vásárlás | Mall"},
{"index": "6", "short": u"Áhítat", "desc": u"Vallásgyakorlás | Önkéntesség"},
{"index": "4", "short": u"Net", "desc": u"Internet | Telefon | Facebook"},
{"index": "1", "short": u"Zuhany", "desc": u"Zuhany | Mosdó"},
{"index": "8", "short": u"Olvasás", "desc": u"Könyv | Újság | Keresztrejtvény"},
{"index": "9", "short": u"Otthon", "desc": u"Házimunka | Gyerekfelügyelet"},
{"index": "0", "short": u"Szundi", "desc": u"Alvás"},
{"index": "11", "short": u"Sport", "desc": u"Edzés | Edzőterem | Szaladás"},
{"index": "12", "short": u"Hobby", "desc": u"Kertészkedés | Barkácsolás | Rokonok"},
{"index": "2", "short": u"Kaja", "desc": u"Étkezés | Étterem"},
{"index": "14", "short": u"Sör", "desc": u"Kávézó | Pub | Szórakozóhely"},
{"index": "13", "short": u"Suli", "desc": u"Tanulás | Magánóra"},
{"index": "10", "short": u"Hivatal", "desc": u"Hivatalos elintéznivalók"},
{"index": "16", "short": u"Úton", "desc": u"Utazás | Vezetés"},
]
labels={i['index']:i['short'] for i in act_codes}


# In[1177]:


#duration
time['sd']=pd.to_datetime(time['Start Date'])
time['ed']=pd.to_datetime(time['End Date'])
jtplot.style(theme='onedork',fscale=1.1, spines=False, grid=False, )
fig,ax=plt.subplots(1,1,figsize=(6,4))
plt.hist([(time['ed'].sub(time['sd'], axis=0))[1:][i].total_seconds()/60.0 for i in range(1,len(time))],
         range=[0,20],bins=range(20),normed=True,color='#ffcc00',alpha=0.8)
plt.xlabel(u"Kérdõív kitöltési ideje (perc)")
plt.ylabel(u"Válaszadók száazléka (%)")
plt.title(u"A kérdõívre 7-8 percet szántatok")
def mjrFormatter(x, pos):
    return str(int(x*100))
ax = plt.gca()
ax.yaxis.set_major_formatter(mpl.ticker.FuncFormatter(mjrFormatter))
fig.tight_layout()
plt.savefig('1.png')
plt.show()


# In[1178]:


mdata=[]
mlen=[]
for k in range(len(output4b)):
    z=np.array(output4b[k].split(',')).astype(int)
    data=[]
    for i in range(len(z)/2):
        for j in range(z[i*2+1]):
            data.append(z[i*2])
    mdata.append(data)
    mlen.append(len(data))
print np.average(mlen)
df=pd.DataFrame(mdata)
histdata=[]
for i in range(len(df.columns)):
    a=np.histogram(df[df.columns[i]],bins=17,range=[-0.5,16.5],normed=True)
    histdata.append(a[0])
df=pd.rolling_mean(pd.DataFrame(histdata),2*rrange,center=True)

df1=df.T
df1[0]=pd.DataFrame(pd.DataFrame(histdata).loc[0])
df1[df1.columns[-1]]=pd.DataFrame(pd.DataFrame(histdata).loc[0])
df2=df1.T.interpolate()


# In[1182]:


jtplot.style(theme='onedork',fscale=1.1, spines=False, grid=False, )
fig,ax=plt.subplots(1,1,figsize=(6,4))
for i in df2.columns:
    plt.plot(df2[i],label=labels[str(i)],color=colors[i],alpha=0.9,lw=2)
plt.xlim(0,1440)
ax.set_xticks([60,240,420,600,780,960,1140,1320])
ax.set_xticklabels(["05:00","08:00","11:00","14:00","17:00","20:00","23:00","02:00"],fontsize=11)
#plt.xlabel(u"Kérdõív kitöltési ideje (perc)")
plt.ylabel(u"Tevékenységek eloszlása (%)")
plt.title(u"Egy átlagos erdélyi hétköznap rutinja")
def mjrFormatter(x, pos):
    return str(int(x*100))
ax = plt.gca()
ax.yaxis.set_major_formatter(mpl.ticker.FuncFormatter(mjrFormatter))
ax.legend(bbox_to_anchor=(1.25, 1.05),fontsize=9)
fig.tight_layout()
plt.savefig('2a.png')
plt.show()

fig,ax=plt.subplots(1,1,figsize=(6,4))
for i in df2.columns:
    if i not in [0,3]:
        plt.plot(df2[i],label=labels[str(i)],color=colors[i],alpha=0.9,lw=2)
plt.xlim(0,1440)
ax.set_xticks([60,240,420,600,780,960,1140,1320])
ax.set_xticklabels(["05:00","08:00","11:00","14:00","17:00","20:00","23:00","02:00"],fontsize=11)
#plt.xlabel(u"Kérdõív kitöltési ideje (perc)")
plt.ylabel(u"Tevékenységek eloszlása (%)")
plt.title(u"Egy átlagos erdélyi hétköznap rutinja\n(munka és szundi kivételével)")
def mjrFormatter(x, pos):
    return str(int(x*100))
ax = plt.gca()
ax.yaxis.set_major_formatter(mpl.ticker.FuncFormatter(mjrFormatter))
ax.legend(bbox_to_anchor=(1.25, 1.05),fontsize=9)
fig.tight_layout()
plt.savefig('2b.png')
plt.show()


# In[1183]:


df2.to_csv('hkoz2.csv')


# In[1184]:


jtplot.style(theme='onedork',fscale=1.1, spines=False, grid=False, )
fig,ax=plt.subplots(1,1,figsize=(6,4))
toplot=[]
tolabel=[]
for i in df2.columns[::-1]:
    toplot.append(df2[i])
    tolabel.append(labels[str(i)])
plt.stackplot(df.index,toplot,colors=colors[::-1],alpha=0.9,labels=tolabel)
plt.xlim(0,1440)
plt.ylim(0,1)
ax.set_xticks([60,240,420,600,780,960,1140,1320])
ax.set_xticklabels(["05:00","08:00","11:00","14:00","17:00","20:00","23:00","02:00"],fontsize=11)
#plt.xlabel(u"Kérdõív kitöltési ideje (perc)")
plt.ylabel(u"Tevékenységek eloszlása (%)")
plt.title(u"Egy átlagos erdélyi hétköznap rutinja")
def mjrFormatter(x, pos):
    return str(int(x*100))
ax = plt.gca()
ax.yaxis.set_major_formatter(mpl.ticker.FuncFormatter(mjrFormatter))
ax.legend()
#reverse legend order
handles, labls = ax.get_legend_handles_labels()
ax.legend(handles[::-1], labls[::-1], bbox_to_anchor=(1.25, 1.05),fontsize=9)
fig.tight_layout()
plt.savefig('3.png')
plt.show()


# In[1185]:


mdata=[]
mlen=[]
for k in range(len(output5b)):
    z=np.array(output5b[k].split(',')).astype(int)
    data=[]
    for i in range(len(z)/2):
        for j in range(z[i*2+1]):
            data.append(z[i*2])
    mdata.append(data)
    mlen.append(len(data))
print np.average(mlen)
df=pd.DataFrame(mdata)
histdata=[]
for i in range(len(df.columns)):
    a=np.histogram(df[df.columns[i]],bins=17,range=[-0.5,16.5],normed=True)
    histdata.append(a[0])
df=pd.rolling_mean(pd.DataFrame(histdata),2*rrange,center=True)

df1=df.T
df1[0]=pd.DataFrame(pd.DataFrame(histdata).loc[0])
df1[df1.columns[-1]]=pd.DataFrame(pd.DataFrame(histdata).loc[0])
df2=df1.T.interpolate()


# In[1186]:


jtplot.style(theme='onedork',fscale=1.1, spines=False, grid=False, )
fig,ax=plt.subplots(1,1,figsize=(6,4))
for i in df2.columns:
    plt.plot(df2[i],label=labels[str(i)],color=colors[i],alpha=0.9,lw=2)
plt.xlim(0,1440)
ax.set_xticks([60,240,420,600,780,960,1140,1320])
ax.set_xticklabels(["05:00","08:00","11:00","14:00","17:00","20:00","23:00","02:00"],fontsize=11)
#plt.xlabel(u"Kérdõív kitöltési ideje (perc)")
plt.ylabel(u"Tevékenységek eloszlása (%)")
plt.title(u"Egy átlagos erdélyi hétvége rutinja")
def mjrFormatter(x, pos):
    return str(int(x*100))
ax = plt.gca()
ax.yaxis.set_major_formatter(mpl.ticker.FuncFormatter(mjrFormatter))
ax.legend(bbox_to_anchor=(1.25, 1.05),fontsize=9)
fig.tight_layout()
plt.savefig('4a.png')
plt.show()

jtplot.style(theme='onedork',fscale=1.1, spines=False, grid=False, )
fig,ax=plt.subplots(1,1,figsize=(6,4))
for i in df2.columns:
    if i!=0:
        plt.plot(df2[i],label=labels[str(i)],color=colors[i],alpha=0.9,lw=2)
plt.xlim(0,1440)
ax.set_xticks([60,240,420,600,780,960,1140,1320])
ax.set_xticklabels(["05:00","08:00","11:00","14:00","17:00","20:00","23:00","02:00"],fontsize=11)
#plt.xlabel(u"Kérdõív kitöltési ideje (perc)")
plt.ylabel(u"Tevékenységek eloszlása (%)")
plt.title(u"Egy átlagos erdélyi hétvége rutinja\n(szundi kivételével)")
def mjrFormatter(x, pos):
    return str(int(x*100))
ax = plt.gca()
ax.yaxis.set_major_formatter(mpl.ticker.FuncFormatter(mjrFormatter))
ax.legend(bbox_to_anchor=(1.25, 1.05),fontsize=9)
fig.tight_layout()
plt.savefig('4b.png')
plt.show()


# In[1187]:


df2.to_csv('hetv2.csv')


# In[1188]:


jtplot.style(theme='onedork',fscale=1.1, spines=False, grid=False, )
fig,ax=plt.subplots(1,1,figsize=(6,4))
toplot=[]
tolabel=[]
for i in df2.columns[::-1]:
    toplot.append(df2[i])
    tolabel.append(labels[str(i)])
plt.stackplot(df2.index,toplot,colors=colors[::-1],alpha=0.9,labels=tolabel)
plt.xlim(0,1440)
plt.ylim(0,1)
ax.set_xticks([60,240,420,600,780,960,1140,1320])
ax.set_xticklabels(["05:00","08:00","11:00","14:00","17:00","20:00","23:00","02:00"],fontsize=11)
#plt.xlabel(u"Kérdõív kitöltési ideje (perc)")
plt.ylabel(u"Tevékenységek eloszlása (%)")
plt.title(u"Egy átlagos erdélyi hétvége rutinja")
def mjrFormatter(x, pos):
    return str(int(x*100))
ax = plt.gca()
ax.yaxis.set_major_formatter(mpl.ticker.FuncFormatter(mjrFormatter))
ax.legend()
#reverse legend order
handles, labls = ax.get_legend_handles_labels()
ax.legend(handles[::-1], labls[::-1], bbox_to_anchor=(1.25, 1.05),fontsize=9)
fig.tight_layout()
plt.savefig('5.png')
plt.show()


# In[1189]:


desc['one']=1


# In[1190]:


desc.head()


# In[1191]:


df=desc[1:].groupby(desc.columns[2]).count()
pie=plt.pie(df['index'],labels=df.index,autopct='%1.0f%%')
labels=[pie[1][i].get_text()+'\n'+pie[2][i].get_text() for i in range(len(pie[1]))]
labels[-1]=labels[-1].replace('\n',' (')+')'
labels[-2]=labels[-2].replace('\n',' (')+')'
plt.clf()
cmap = plt.cm.viridis
colors1 = cmap(np.linspace(0.2, 0.9, len(pie[0])))
#np.random.shuffle(colors1)
pie=plt.pie(df['index'],labels=labels,colors=colors1,startangle=5)
ax=plt.gca()
ax.set_aspect('equal')
plt.title(u'Válaszadók életkor szerinti eloszlása')
fig.tight_layout()
plt.savefig('6.png')
plt.show()


# In[1192]:


df=desc[1:].groupby(desc.columns[3]).count()
pie=plt.pie(df['index'],labels=df.index,autopct='%1.0f%%')
labels=[pie[1][i].get_text()+'\n'+pie[2][i].get_text() for i in range(len(pie[1]))]
labels[-1]=labels[-1].replace('\n',' (')+')'
labels[-2]=labels[-2].replace('\n',' (')+')'
plt.clf()
cmap = plt.cm.viridis
colors1 = cmap(np.linspace(0.2, 0.9, len(pie[0])))
#np.random.shuffle(colors1)
pie=plt.pie(df['index'],labels=labels,colors=colors1,startangle=25)
ax=plt.gca()
ax.set_aspect('equal')
plt.title(u'Válaszadók nem szerinti eloszlása')
fig.tight_layout()
plt.savefig('7.png')
plt.show()


# In[1193]:


df=desc[1:].groupby(desc.columns[4]).count()
pie=plt.pie(df['index'],labels=df.index,autopct='%1.0f%%')
labels=[pie[1][i].get_text()+'\n'+pie[2][i].get_text() for i in range(len(pie[1]))]
labels[-1]=labels[-1].replace('\n',' (')+')'
labels[-2]=labels[-2].replace('\n',' (')+')'
plt.clf()
cmap = plt.cm.viridis
colors1 = cmap(np.linspace(0.2, 0.9, len(pie[0])))
#np.random.shuffle(colors1)
pie=plt.pie(df['index'],labels=labels,colors=colors1,startangle=30)
ax=plt.gca()
ax.set_aspect('equal')
plt.title(u'Válaszadók lakhely szerinti eloszlása')
fig.tight_layout()
plt.savefig('8.png')
plt.show()


# In[780]:


for i in set(desc[desc.columns[-1]].values):
    print i


# In[777]:


for i in set(desc[desc.columns[1]].values):
    print i