Ez a munkafüzet az I. SZÉKELYDATA adatvizualizációs pályázat keretén belül publikált adatsort dolgozza fel, tisztítja, és készíti elő későbbi vizualizáláshoz, mely [ezen] (https://lorinczszabolcs.github.io/eletpalya/) az oldalon megtekinthető.
Több lépésben végezzük az elemzést:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import json
# unidecode - ékezetek eltűntetése
import unidecode
# stílus a megjelenítéshez
plt.style.use('ggplot')
# varosok lakossaga
lakossag = {'Kolozsvár': 324576, 'Kovászna': 10114,
'Marosvásárhely': 134290, 'Csíkszereda' : 38966,
'Szentgyörgy': 56006, 'Gyergyó': 18377,
'Kézdivásárhely': 18491, 'Székelyudvarhely': 32935,
'Székelykeresztúr': 9491, 'Szászrégen': 33281,
'Maroshévíz': 13929, 'Segesvár': 28102,
'Barót': 8672, 'Szentegyházasfalu': 6898,
'Balánbánya': 6115, 'Szováta': 10385,
'Parajd': 3448}
filename = '../data/data.json'
df = pd.read_json(filename)
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 49220 entries, 0 to 49219 Data columns (total 19 columns): birth 49220 non-null object id 49220 non-null int64 livesin 43615 non-null object marriedsince 4897 non-null object marriedto 7986 non-null object name 49220 non-null object other1 8440 non-null object other2 307 non-null object other3 1 non-null object other4 1 non-null object other5 1 non-null object other6 1 non-null object school 133 non-null object schoolyear 1487 non-null object studwhat 3805 non-null object studwhere 22175 non-null object studyear 5473 non-null object workwhat 9572 non-null object workwhere 25718 non-null object dtypes: int64(1), object(18) memory usage: 7.1+ MB
Úgy tűnik elég sok hiányzó adattal van dolgunk. Ezeket majd a későbbiekben ki kell szűrjük.
fig = plt.figure(figsize=(5,5));
stud_class=df['studwhat'].value_counts()
ax = stud_class[stud_class > 10].plot.pie(autopct='%.1f')
ax.axis('equal');
titledict = {'fontsize': 15, 'fontweight' : 100, 'verticalalignment': 'baseline', 'horizontalalignment': 'center'};
ax.set_title('Tanulmányi területek (>10 ember)', fontdict=titledict, y = 1.08);
ax.set_ylabel('');
print('Tanulmányi területek száma: ', df['studwhat'].value_counts().size)
Tanulmányi területek száma: 1781
fig = plt.figure(figsize=(5,5));
work_class=df['workwhat'].value_counts()
ax = work_class[work_class > 10].plot.pie(autopct='%.1f')
ax.axis('equal');
titledict = {'fontsize': 15, 'fontweight' : 100, 'verticalalignment': 'baseline', 'horizontalalignment': 'center'};
ax.set_title('Szakterületek (>10 ember)', fontdict=titledict, y = 1.08);
ax.set_ylabel('');
print('Szakterületek száma: ', df['workwhat'].value_counts().size)
Szakterületek száma: 4597
fig = plt.figure(figsize=(5,5));
live_class=df['livesin'].value_counts()
ax = live_class[live_class > 15].plot.pie(autopct='%.1f')
ax.axis('equal');
titledict = {'fontsize': 15, 'fontweight' : 100, 'verticalalignment': 'baseline', 'horizontalalignment': 'center'};
ax.set_title('Lakhelyek (>15 ember)', fontdict=titledict, y = 1.08);
ax.set_ylabel('');
print('Lakhelyek száma: ', df['livesin'].value_counts().size)
Lakhelyek száma: 3600
Átnevezzük a születési helyeket, majd tisztítani és csoportosítani kell a tanulmányi területeket, munkahelyeket, és lakhelyeket, mivel több nyelven, helyesírási hibával, vagy anélkül is szerepelnek adatok.
def birth(b):
"""
Rename birth
"""
if b == 'balan' : return 'Balánbánya'
elif b == 'barot' : return 'Barót'
elif b == 'csikszereda' : return 'Csíkszereda'
elif b =='gyergyo' : return 'Gyergyó'
elif b == 'keresztur' : return 'Székelykeresztúr'
elif b == 'kezdi' : return 'Kézdivásárhely'
elif b == 'kolozsvar' : return 'Kolozsvár'
elif b == 'kovaszna' : return 'Kovászna'
elif b == 'parajd' : return 'Parajd'
elif b == 'regen' : return 'Szászrégen'
elif b == 'segesvar' : return 'Segesvár'
elif b == 'szentgyorgy' : return 'Szentgyörgy'
elif b == 'szovata' : return 'Szováta'
elif b == 'toplita' : return 'Maroshévíz'
elif b == 'udvarhely' : return 'Székelyudvarhely'
elif b == 'vasarhely' : return 'Marosvásárhely'
elif b == 'vlahica' : return 'Szentegyházasfalu'
else : return b
df[['birth']] = df[['birth']].applymap(lambda x : birth(x))
print(df['birth'].value_counts())
Kolozsvár 3942 Kovászna 3915 Marosvásárhely 3823 Csíkszereda 3508 Szentgyörgy 3494 Gyergyó 3461 Kézdivásárhely 3384 Székelyudvarhely 3310 Székelykeresztúr 2830 Szászrégen 2770 Maroshévíz 2564 Segesvár 2539 Barót 2335 Szentegyházasfalu 2329 Balánbánya 2312 Szováta 1522 Parajd 1182 Name: birth, dtype: int64
df[['studwhat']] = df[['studwhat']].applymap(lambda x : str.lower(unidecode.unidecode(x)) if pd.notna(x) else np.nan)
def stud(s):
"""
Categorises input field of study 's' into main categories.
"""
# Mérnöki/Technológia
if 'engineer' in s \
or 'inginer' in s \
or 'tcm' in s \
or 'mernok' in s \
or 'archite' in s \
or 'a.c.' in s \
or 'urbanism' in s \
or 'industr' in s \
or 'constructii' in s \
or 'ccia' in s \
or 'ing.' in s \
or 'cepa' in s \
or 'robot' in s \
or 'instalatii' in s \
or 'securitatea rutiera' in s \
or 'upb' in s \
or 'utc-n' in s \
or 'automotive' in s \
or 'mecha' in s \
or 'cad' in s \
or 'electro' in s \
or 'hardware' in s \
or 'mechanics' in s \
or 'autovehicule rutiere' in s \
or 'mecanica' in s \
or 'mehanika' in s: return u'Mérnöki/Technológia'
# Matematika/Informatika
elif 'computer' in s \
or 'software' in s \
or 'intelligent systems' in s \
or 'szamitas' in s \
or 'calculatoare' in s \
or 'info' in s \
or 'cig' in s \
or 'database' in s \
or 'mate' in s \
or 'metematika' in s \
or 'szoft' in s \
or 'it' in s \
or 'math' in s : return u'Matematika/Informatika'
# Természettudományok
elif 'biol' in s \
or 'geo' in s \
or 'astronomy' in s \
or 'okologia' in s \
or 'kornyezettudomany' in s \
or 'foldrajz' in s \
or 'meteorology' in s \
or 'meteorologia' in s \
or 'chemistry' in s \
or 'fizika' in s \
or 'environmental science' in s \
or 'stiinte ale naturi' in s \
or 'topography' in s \
or 'physics' in s \
or 'chimie' in s \
or 'kemia' in s \
or 'natural science' in s : return u'Természettudományok'
# Orvosi/Gyógyszerészet/Egészség
elif 'medic' in s \
or 'nurs' in s \
or 'pharmacy' in s \
or 'etszk' in s \
or 'pharmac' in s \
or 'beteg' in s \
or 'kineto' in s \
or 'balneo' in s \
or 'apolo' in s\
or 'egeszseg' in s \
or 'dentistry' in s \
or 'fogtechnika' in s \
or 'health' in s \
or 'fogasz' in s \
or 'dent' in s \
or 'aok' in s \
or 'nutrition & dietetics' in s \
or 'sykepleie bachelor' in s \
or 'orvos' in s \
or 'gyogy' in s \
or 'kinetoterapie' in s : return u'Egészségügy'
# Pénzügy/Management/Üzlet
elif 'management' in s \
or 'marketing' in s \
or 'finan' in s \
or 'bussiness' in s \
or 'kereskedelem' in s \
or 'manag' in s \
or 'konyvel' in s \
or 'human resource' in s \
or 'hr' in s \
or 'business' in s \
or 'contabilitate' in s \
or 'administratie' in s \
or 'fsega' in s \
or 'acounting' in s \
or 'bank' in s \
or 'account' in s \
or 'betriebswirtschaft' in s \
or 'afaceri' in s : return u'Pénzügy/Management/Üzlet'
# Társadalomtudományok
elif 'sociology' in s \
or 'econom' in s \
or 'social science' in s \
or 'psycho' in s \
or 'pszicho' in s \
or 'psziho' in s \
or 'psiho' in s \
or 'mental' in s \
or 'gazdalkodas' in s \
or 'kozgazdasz' in s \
or 'gazdasag' in s \
or 'tarsadalmi' in s \
or 'law' in s\
or 'cultural' in s \
or 'juridice' in s \
or 'tortenelem' in s \
or 'jog' in s \
or 'drept' in s \
or 'ecology' in s \
or 'touris' in s \
or 'social' in s \
or 'turis' in s \
or 'turiz' in s \
or 'international relations' in s \
or 'nemzetkozi kapcsolatok' in s \
or 'african studies' in s \
or 'politic' in s \
or 'kozigazgatas' in s \
or 'archaeology' in s \
or 'public administration' in s \
or 'r.i.s.e' in s \
or 'studii europene' in s \
or 'nemzetkozi tanulmanyok' in s \
or 'european studies' in s \
or 'polito' in s \
or 'public policy' in s \
or 'socio' in s \
or 'szocio' in s : return u'Társadalomtudományok'
# Oktatás
elif 'peda' in s \
or 'teaching' in s \
or 'tanar' in s \
or 'andragog' in s \
or 'alternativ modszerek az ovodaban es az elemi osztalyokban' in s \
or 'neveles' in s \
or 'd.p.p.d' in s \
or 'stiinte ale educatiei' in s \
or 'educatie fizica' in s \
or 'ed.fiz' in s \
or 'bodybuilding' in s \
or 'physical' in s \
or 'volei' in s \
or 'testnevel' in s \
or 'education' in s : return u'Oktatás'
# Művészetek
elif 'photography' in s \
or 'photo' in s \
or 'acting' in s \
or 'operator' in s \
or 'saxophone' in s \
or 'graphics' in s \
or 'art' in s \
or 'canto popular' in s \
or 'actor' in s \
or 'muveszet' in s \
or 'choreography' in s \
or 'painting' in s \
or 'music' in s \
or 'desiner' in s \
or 'szobraszat' in s \
or 'puppery' in s \
or 'muzica' in s \
or 'piano' in s \
or 'violin' in s \
or 'sing' in s \
or 'zene' in s \
or 'drama' in s \
or 'bass' in s \
or 'szinesz' in s \
or 'percussion' in s \
or 'puppetry' in s \
or 'dance' in s \
or 'sculpture' in s \
or 'szinmuvesz' in s \
or 'kameraman' in s \
or 'enek' in s : return u'Művészetek'
# Média/Kommunikáció
elif 'journalism' in s\
or 'ujsag' in s \
or 'design' in s \
or 'advertising' in s \
or 'com' in s \
or 'film' in s \
or 'public relations' in s \
or 'pr' in s \
or 'newspaper' in s \
or 'media' in s \
or 'komm' in s : return u'Média/Kommunikáció'
# Humán tárgyak
elif 'roman' in s \
or 'engleza' in s \
or 'english' in s \
or 'german' in s \
or 'literature' in s \
or 'translation' in s \
or 'angol' in s \
or 'language' in s \
or 'limbi' in s \
or 'hungarian' in s \
or 'linguistics' in s \
or 'japanese' in s \
or 'bolcsesz' in s \
or 'magyar' in s \
or 'aesthetics' in s \
or 'deutsch' in s \
or 'french' in s \
or 'history'in s \
or 'philosophy' in s \
or 'bolcseszkar' in s \
or 'filozofia' in s \
or 'katolikus' in s \
or 'phylosophy' in s \
or 'philology' in s \
or 'filologia' in s \
or 'filozofie' in s \
or 'religi' in s \
or 'missions & evangelism' in s \
or 'theology' in s \
or 'vallas' in s : return u'Humán tárgyak'
# Szakma/Alkalmazott tudományok
elif 'asztalos' in s \
or 'hivatasos' in s \
or 'ospatar' in s \
or 'auto electrician' in s \
or 'pincer' in s \
or 'agriculture' in s \
or 'vezetes es szervezes' in s \
or 'szakacs' in s \
or 'foci' in s \
or 'kozmetikus' in s \
or 'hairdressing' in s \
or 'coafura' in s \
or 'kozmetika' in s \
or 'optometry' in s \
or 'fodrasz' in s \
or 'sport' in s \
or 'cosmetology' in s \
or 'lacatus' in s \
or 'lacatuserie' in s \
or 'lakatos' in s \
or 'gepesz' in s \
or 'autoszerelo' in s \
or 'agro' in s \
or 'horticulture' in s \
or 'silvicultura' in s \
or 'silviculture' in s \
or 'faipar' in s \
or 'optometria' in s : return u'Alkalmazott tudományok'
else : return s
fig = plt.figure(figsize=(5,5));
df[['studwhat']] = df[['studwhat']].applymap(lambda x : stud(x) if pd.notna(x) else np.nan)
stud_class=df['studwhat'].value_counts()
ax = stud_class[stud_class > 15].plot.pie(autopct='%.1f')
ax.axis('equal');
titledict = {'fontsize': 15, 'fontweight' : 100, 'verticalalignment': 'baseline', 'horizontalalignment': 'center'};
ax.set_title('Tanulmányi területek (>15 ember)', fontdict=titledict, y = 1.08);
ax.set_ylabel('');
print('Tanulmányi területek száma: ', stud_class[stud_class > 15].value_counts().size)
Tanulmányi területek száma: 11
df[['workwhat']] = df[['workwhat']].applymap(lambda x : str.lower(unidecode.unidecode(x)) if pd.notna(x) else np.nan)
df['workwhat'].value_counts()
owner 154 manager 119 administrator 108 engineer 65 ovono 65 tanar 55 tanitono 49 photographer 47 operator 47 bartender 44 software engineer 41 teacher 40 director 36 software developer 36 munkas 32 tulajdonos 31 sofor 29 sofer 29 graphic designer 28 volunteering 27 asistent medical generalist 26 volunteer 26 pincer 26 project manager 26 web developer 25 consultant 25 fonok 25 general manager 24 truck driver 23 professor 22 ... tulajdonos, vezeto 1 read the diary of ellen rimbauer: my life 1 music teacher 1 szerkeszto-musorvezeto-riporter-stb. 1 ujsagiro, szerkeszto 1 spartan warrior 1 realizator cinema l!ve si jurnalul de noapte 1 dtp/graphic designer 1 vice president marketing 1 consultant fiscal 1 former nursing instructor 1 urology resident 1 owern's 1 semmi xd 1 associate field engineer 1 the university of north carolina 1 former kos karoly 1 managing partner/administrator 1 gestione appalti 1 kepzelges 1 former los angeles california 1 phd 1 contact center agent 1 consilier pentru afaceri europene 1 bisztonsagi or 1 sucursala cluj-napoca 1 scolarita 1 epitkezes 1 makeup artist 1 teszter 1 Name: workwhat, Length: 4384, dtype: int64
def work(w):
"""
Categorises input work 'w' into main categories.
"""
# Programmer
if 'software' in w \
or 'developer' in w \
or 'informa' in w \
or 'asm' in w \
or 'sysop' in w \
or 'it compliance' in w \
or 'programmer' in w \
or 'web' in w : return u'Informatikus'
elif 'tanitono' in w \
or 'tanar' in w \
or 'ovono' in w \
or 'teacher' in w \
or 'dirigente' in w \
or 'pedagog' in w \
or 'student' in w \
or 'tanulo' in w \
or 'invatatoare' in w \
or 'tanito' in w \
or 'sudent' in w \
or 'elev' in w \
or 'muvelodesszervezo' in w \
or 'oktato' in w \
or 'teaching' in w \
or 'study' in w \
or 'diak' in w \
or 'ovoneni' in w \
or 'profesor' in w \
or 'lecturer' in w \
or 'instructor' in w \
or 'university' in w \
or 'adjunktus' in w \
or 'educatoare' in w \
or 'researcher' in w \
or 'professor' in w : return 'Oktatás'
elif 'sofer' in w \
or 'driver' in w \
or 'curier' in w \
or 'posta' in w \
or 'gepkocsivezeto' in w \
or 'raktar' in w \
or w == 'vezeto' \
or 'komissio' in w \
or 'messenger' in w \
or 'komisszio' in w \
or 'comission' in w \
or 'fahrer' in w \
or 'transport' in w \
or 'rider' in w \
or 'sofor' in w : return 'Logisztika'
elif 'volunteer' in w \
or 'au pair' in w \
or 'voluntar' in w \
or 'social work' in w \
or 'onkentes' in w : return 'Önkéntes'
elif 'tancos' in w \
or 'szinesz' in w \
or 'enekes' in w \
or 'drummer' in w \
or 'artist' in w \
or 'dj' in w \
or 'music' in w \
or 'guitar' in w \
or 'muvesz' in w \
or 'musician' in w \
or 'bassist' in w \
or 'zenesz' in w \
or 'karmester' in w \
or 'actress' in w \
or 'animator' in w \
or 'actor' in w \
or 'dance' in w \
or 'grafik' in w \
or 'sing' in w \
or 'actor' in w : return 'Művész'
elif 'sales' in w \
or 'consultant' in w \
or 'uzlet' in w \
or 'marketing' in w \
or 'cashier' in w \
or 'casier' in w \
or 'ugynok' in w \
or 'agent' in w \
or 'broker' in w \
or 'vanzari' in w \
or 'vanzato' in w \
or 'elado' in w : return 'Eladás'
elif 'bartender' in w \
or 'waitress' in w \
or 'pultos' in w \
or 'pincer' in w \
or 'reception' in w \
or 'kiszolgalo' in w \
or 'ospatar' in w \
or 'barista' in w \
or 'bucatar' in w \
or 'waiter' in w \
or 'waiting staff' in w \
or 'baros' in w \
or 'cukrasz' in w \
or 'sef' in w \
or 'szakacs' in w \
or 'chef' in w \
or 'barman' in w \
or 'pincer' in w : return 'Vendéglátás'
elif 'asistent' in w \
or 'orvos' in w \
or 'aszisztens' in w \
or 'gyogyszer' in w \
or 'psiholog' in w \
or 'health' in w \
or 'apolo' in w \
or 'veterinarian' in w \
or 'kranken' in w \
or 'assistenz' in w \
or 'nurse' in w \
or 'assziszt' in w \
or 'orthodontist' in w \
or 'dietitian' in w \
or 'physician' in w \
or 'dr.' in w \
or 'rezidens' in w \
or 'kineto' in w \
or 'dentist' in w \
or 'pszicho' in w \
or 'rezident' in w \
or 'doctor' in w \
or 'pharma' in w \
or 'optometrista' in w \
or 'farmac' in w \
or 'medic' in w : return 'Egészségügy'
elif 'kozmetik' in w \
or 'fodrasz' in w \
or 'kormos' in w \
or 'manichiur' in w \
or 'coaf' in w \
or 'hair' in w : return 'Kozmetika/Fodrász'
elif 'train' in w \
or 'torna' in w \
or 'versenyzo' in w \
or 'jucator' in w \
or 'forward' in w \
or 'vego' in w \
or 'beallo' in w \
or 'kozeppalyas' in w \
or 'jatekos' in w \
or 'coach' in w \
or 'striker' in w \
or 'arbitru' in w \
or 'midfielder' in w \
or 'sport' in w \
or 'captain' in w \
or 'player' in w \
or 'fotbal' in w \
or 'atacant' in w \
or 'csatar' in w \
or 'goalkeeper' in w \
or 'fitness' in w \
or 'antrenor' in w : return 'Sport'
elif 'secretary' in w \
or 'alkalmazott' in w \
or 'iroda' in w \
or 'oordinator' in w \
or 'economist' in w \
or 'titkarno' in w \
or 'coordonator' in w \
or 'public relations' in w \
or 'contabil' in w \
or 'konyvtaros' in w \
or 'kozgazdasz' in w \
or 'referens' in w \
or 'secretara' in w \
or 'technical support' in w \
or 'dispecer' in w \
or 'titkar' in w \
or 'inspector' in w \
or 'referent' in w \
or 'partner' in w \
or 'employee' in w \
or 'accountant' in w \
or 'konyvelo' in w : return 'Irodai alkalmazott'
elif 'engineer' in w \
or 'inginer' in w \
or 'mernok' in w \
or 'epitesz' in w \
or 'constructor' in w \
or 'arhitect' in w \
or 'architect' in w \
or 'arhchitect' in w \
or 'autocad' in w : return 'Mérnök'
elif 'polgarmester' in w \
or 'president' in w \
or 'tanacsos' in w \
or 'orsvezeto' in w \
or 'presedinte' in w : return 'Közalkalmazott'
elif 'photo' in w \
or 'media' in w \
or 'designer' in w \
or 'operator' in w \
or 'foto' in w \
or 'writer' in w \
or 'model' in w \
or 'musorvezeto' in w \
or 'press' in w \
or 'reporter' in w \
or 'editor' in w \
or 'szerkeszto' in w \
or 'ujsagiro' in w \
or 'producer' in w \
or 'blogger' in w \
or 'journalist' in w \
or 'martketing' in w : return 'Média'
elif 'lawyer' in w \
or 'ugyved' in w \
or 'juridic' in w \
or 'avocat' in w : return 'Jogi'
elif 'varrono' in w \
or 'munkas' in w \
or 'muncitor' in w \
or 'electrician' in w \
or 'villanyszerelo' in w \
or 'trabajador' in w \
or 'kazan' in w \
or 'worker' in w \
or 'mindenes' in w \
or 'kasszas' in w \
or 'gepkezelo' in w \
or 'flight attendant' in w \
or 'lucrator comercial' in w \
or 'szerelo' in w \
or 'baker' in w \
or 'muncitoare' in w \
or 'cabin crew' in w \
or 'elarusito' in w \
or 'technician' in w \
or 'pilot' in w \
or 'masszor' in w \
or 'pastor' in w \
or 'mechanic' in w \
or 'firefighter' in w \
or 'mecanic' in w \
or 'hegeszto' in w \
or 'fafarago' in w \
or 'asztalos' in w : return 'Szakmunkás'
elif 'boss' in w \
or 'owner' in w \
or 'manager' in w \
or 'ugyvezeto' in w \
or 'manager' in w \
or 'admin' in w \
or 'igazgato' in w \
or 'vallalkozo' in w \
or 'presedinte' in w \
or 'masseur' in w \
or 'chief' in w \
or 'president' in w \
or 'cegvezeto' in w \
or 'leader' in w \
or 'ceo' in w \
or 'presedinte' in w \
or 'supervisor' in w \
or 'director' in w \
or 'tulajdonos' in w \
or 'menedzser' in w \
or 'elnok' in w \
or 'fonok' in w \
or 'management' in w \
or 'founder' in w : return 'Tulajdonos/Manager'
elif 'pap' in w \
or 'popa' in w \
or 'lelkesz' in w \
or 'priest' in w \
or 'kantor' in w \
or 'lelkipasztor' in w : return 'Egyház'
else: return np.nan
fig = plt.figure(figsize=(5,5));
df[['workwhat']] = df[['workwhat']].applymap(lambda x : work(x) if pd.notna(x) else np.nan)
work_class=df['workwhat'].value_counts()
ax = work_class[work_class > 15].plot.pie(autopct='%.1f')
ax.axis('equal');
titledict = {'fontsize': 15, 'fontweight' : 100, 'verticalalignment': 'baseline', 'horizontalalignment': 'center'};
ax.set_title('Szakterületek (>15 ember)', fontdict=titledict, y = 1.08);
ax.set_ylabel('');
df[['livesin']] = df[['livesin']].applymap(lambda x : str.lower(unidecode.unidecode(x)) if pd.notna(x) else np.nan)
df['livesin'].value_counts()
cluj-napoca 5000 sepsiszentgyorgy 2575 targu-mures 2442 csikszereda 1846 szekelyudvarhely 1818 kezdivasarhely, covasna, romania 1679 budapest, hungary 1548 szekelykeresztur, harghita, romania 1186 szentegyhazasfalu 1106 sighisoara, romania 1069 gyergyoszentmiklos 973 marosvasarhely, mures, romania 944 reghin 933 parajd 831 toplita 810 sovata 767 barot, covasna, romania 709 bucharest, romania 681 balan, romania 556 london, united kingdom 408 brasov, romania 378 toplita-ciuc, harghita, romania 312 reghinul, mures, romania 292 sfantu-gheorghe, covasna, romania 280 vlahita, harghita, romania 245 szovata, mures, romania 210 miercurea-ciuc, harghita, romania 192 rome, italy 146 gyergyoalfalu, harghita, romania 139 baraolt 134 ... skegness 1 molenstede, brabant, belgium 1 raiano 1 charleroi 1 saint-christol, provence-alpes-cote d'azur, france 1 torokbecse 1 racosul-de-sus, harghita, romania 1 forest row 1 goolwa, south australia 1 montalbano di elicona, sicilia, italy 1 heraklion, greece 1 cattolica, emilia-romagna, italy 1 lech, vorarlberg, austria 1 fakenham, norfolk 1 kaszopuszta, somogy, hungary 1 grintiesu mic, neamt, romania 1 pietrastornina 1 koroshegy 1 de haan, belgium 1 furstenfeldbruck, germany 1 vilvorde, brabant, belgium 1 egerag, baranya, hungary 1 mikepercs 1 csorna 1 lazaresti, harghita, romania 1 peiraeus, attiki, greece 1 kirchdorf in tirol, tirol, austria 1 paralim, famagusta, cyprus 1 vaitape, french polynesia 1 auboue, lorraine, france 1 Name: livesin, Length: 3600, dtype: int64
def lives(l) :
if 'miercurea-ciuc' in l \
or 'miercurea ciuc' in l \
or 'csikszereda' in l : return 'Csíkszereda'
elif 'szekelyudvarhely' in l \
or 'odorheiul secuiesc' in l \
or 'odorheiu secuiesc' in l: return 'Székelyudvarhely'
elif 'gheorgheni' in l \
or 'gyergyo' in l : return 'Gyergyó'
elif 'targu-mures' in l \
or 'tirgu-mures' in l \
or 'marosvasarhely' in l : return 'Marosvásárhely'
elif 'sepsiszentgyorgy' in l \
or 'sfantu-gheorghe' in l \
or 'sfantu gheorghe' in l : return 'Sepsiszentgyörgy'
elif 'sighisoara' in l \
or 'segesvar' in l : return 'Segesvár'
elif 'cluj-napoca' in l \
or 'kolozsvar' in l :return 'Kolozsvár'
elif 'kezdivasarhely' in l \
or 'targu secuiesc' in l : return 'Kézdivásárhely'
elif 'oradea' in l \
or 'nagyvarad' in l : return 'Nagyvárad'
elif 'timisoara' in l \
or 'temesvar' in l : return 'Temesvár'
elif 'brasov' in l \
or 'brasso' in l : return 'Brassó'
elif 'bucharest' in l \
or 'bukarest' in l \
or 'bucuresti' in l : return 'Bukarest'
elif 'budapest' in l : return 'Budapest'
elif 'reghin' in l \
or 'regen' in l : return 'Szászrégen'
elif 'keresztur' in l \
or 'cristuru' in l : return 'Székelykeresztúr'
elif 'toplita' in l \
or 'toplica' in l : return 'Toplița'
elif 'szentegyhaza' in l \
or 'vlahita' in l : return 'Szentegyházasfalu'
elif 'london' in l : return 'London'
elif 'sovata' in l \
or 'szovata' in l : return 'Szováta'
elif 'barot' in l \
or 'baraolt' in l : return 'Barót'
elif 'balan' in l : return 'Balánbánya'
elif 'parajd' in l \
or 'praid' in l : return 'Parajd'
elif 'arad' in l : return 'Arad'
elif 'vienna' in l \
or 'becs' in l : return 'Bécs'
elif 'dublin' in l : return 'Dublin'
elif 'tusnad' in l : return 'Tusnád'
elif 'sibiu' in l \
or 'szeben' in l : return 'Nagyszeben'
elif 'gyulafehervar' in l \
or 'alba iulia' in l : return 'Gyulafehérvár'
elif 'iasi' in l \
or 'jaszvasar' in l : return 'Jászvásár'
elif 'munich' in l \
or 'munchen' in l : return 'München'
elif 'roma' in l \
or 'rome' in l : return 'Róma'
elif 'new york' in l : return 'New York'
elif 'turia' in l \
or 'torja' in l : return 'Torja'
elif 'berlin' in l : return 'Berlin'
elif 'debrecen' in l : return 'Debrecen'
elif 'sopron' in l : return 'Sopron'
elif 'szeged' in l : return 'Szeged'
elif 'ozsdola' in l \
or 'ojdula' in l : return 'Ozsdola'
elif 'dumbraveni' in l : return 'Dumbrăveni'
elif 'vecses' in l : return 'Vecsés'
elif 'kaposvar' in l : return 'Kaposvár'
elif 'mosonmagyarovar' in l : return 'Mosonmagyaróvár'
elif 'eger' in l : return 'Eger'
elif 'gelence' in l \
or 'ghelinta' in l : return 'Gelence'
elif 'borsec' in l \
or 'borszek' in l : return 'Borszék'
elif 'galati' in l \
or 'galac' in l : return 'Galați'
elif 'deva' in l : return 'Déva'
elif 'lajosmizse' in l : return 'Lajosmizse'
elif 'erd' in l : return 'Érd'
elif 'toronto' in l : return 'Toronto'
elif 'veszprem' in l : return 'Veszprém'
elif 'cologne' in l \
or 'koln' in l : return 'Köln'
elif 'pusztavam' in l : return 'Pusztavám'
elif 'intorsura buzaului' in l \
or 'bodzafordulo' in l : return 'Bodzaforduló'
elif 'bistrita' in l \
or 'beszterce' in l : return 'Beszterce'
elif 'tarnaveni' in l \
or 'dicsoszentmarton' in l : return 'Târnăveni'
elif 'lazarea' in l \
or 'szarhegy' in l : return 'Szárhegy'
elif 'kolding' in l : return 'Kolding'
elif 'passau' in l : return 'Passau'
elif 'ingolstadt' in l : return 'Ingolstadt'
elif l == 'covasna' \
or l == 'kovaszna' : return 'Kovászna'
elif 'satu mare' in l \
or 'szatmar' in l : return 'Szatmár'
elif 'atlanta' in l : return 'Atlanta'
elif 'pecs' in l : return 'Pécs'
elif 'korinth' in l : return 'Korinth'
elif 'kerepes' in l : return 'Kerepes'
elif 'nyiregyhaza' in l : return 'Nyíregyháza'
elif 'gidofalva' in l \
or 'ghidfalau' in l : return 'Gidófalva'
elif 'piatra neamt' in l : return 'Piatra Neamț'
else: return l
fig = plt.figure(figsize=(5,5));
df[['livesin']] = df[['livesin']].applymap(lambda x : lives(x) if pd.notna(x) else np.nan)
live_class=df['livesin'].value_counts()
ax = live_class[live_class > 100].plot.pie(autopct='%.1f')
ax.axis('equal');
titledict = {'fontsize': 15, 'fontweight' : 100, 'verticalalignment': 'baseline', 'horizontalalignment': 'center'};
ax.set_title('Lakhelyek (>100 ember)', fontdict=titledict, y = 1.08);
ax.set_ylabel('');
print('Lakhelyek száma: ', live_class[live_class > 100].size)
Lakhelyek száma: 24
Előbb kiválasztjuk a megfelelő oszlopokat, majd töröljük az összes olyan mérést, mely nincs kitöltve(NaN), valamint töröljük az ékezeteket és kisbetűsre alakítjuk a DataFrame tartalmát, hogy könnyebb legyen később a feldolgozás.
df_birth_stud=df.loc[:, ['birth', 'studwhat']].copy()
df_birth_stud.dropna(inplace=True)
df_birth_stud = df_birth_stud.groupby('studwhat').filter(lambda x : len (x) > 15)
Adatok mennyisége százalékban:
[[df_birth_stud.iloc[:, 0].value_counts().index[i], 100*df_birth_stud.iloc[:, 0].value_counts()[i]/lakossag[df_birth_stud.iloc[:, 0].value_counts().index[i]], df_birth_stud.iloc[:, 0].value_counts()[i]] for i in range(df_birth_stud.iloc[:, 0].value_counts().size)]
[['Kolozsvár', 0.12847530316474415, 417], ['Marosvásárhely', 0.2993521483356914, 402], ['Kovászna', 3.6088590073165907, 365], ['Szentgyörgy', 0.4928043423918866, 276], ['Csíkszereda', 0.6723810501462814, 262], ['Székelyudvarhely', 0.7347806285107029, 242], ['Gyergyó', 1.2243565326222996, 225], ['Szászrégen', 0.6520236771731619, 217], ['Segesvár', 0.7437193082342893, 209], ['Kézdivásárhely', 1.1248715591368774, 208], ['Maroshévíz', 1.026635077895039, 143], ['Székelykeresztúr', 1.190601622589822, 113], ['Balánbánya', 1.3409648405560097, 82], ['Barót', 0.8994464944649446, 78], ['Szentegyházasfalu', 1.0727747173093651, 74], ['Szováta', 0.587385652383245, 61], ['Parajd', 0.8700696055684455, 30]]
heatmap = pd.crosstab(df_birth_stud.birth, df_birth_stud.studwhat)
A lenti hőtérképről könnyen leolvasható, hogy egyes városokban született emberek esetén mely tanulmányi területek a legnépszerűbbek.
További elemzés a weboldalon.
fig, ax = plt.subplots(figsize=(15,10))
ax = sns.heatmap(heatmap, annot=True, cbar=False, fmt='g', annot_kws={"size": 14})
ax.set_title('Születési hely - Tanulmányok', fontsize=24, y = 1.04);
ax.set_xlabel('Tanulmányok', fontsize=18);
ax.set_ylabel('Születési hely', fontsize=18);
plt.setp(ax.get_xticklabels(), rotation='vertical', fontsize=14);
plt.setp(ax.get_yticklabels(), rotation='horizontal', fontsize=14);
df_birth_work=df.loc[:, ['birth', 'workwhat']].copy()
df_birth_work.dropna(inplace=True)
df_birth_work = df_birth_work.groupby('workwhat').filter(lambda x : len (x) >= 10)
Adatok mennyisége százalékban a népességhez képest:
[[df_birth_work.iloc[:, 0].value_counts().index[i], 100*df_birth_work.iloc[:, 0].value_counts()[i]/lakossag[df_birth_work.iloc[:, 0].value_counts().index[i]]] for i in range(df_birth_work.iloc[:, 0].value_counts().size)]
[['Kolozsvár', 0.2991595188800158], ['Marosvásárhely', 0.6366818080274034], ['Kovászna', 7.178168874826972], ['Szentgyörgy', 1.015962575438346], ['Csíkszereda', 1.2292768054201098], ['Székelyudvarhely', 1.3390010626992561], ['Szászrégen', 1.2770048976893724], ['Kézdivásárhely', 2.271375263641772], ['Gyergyó', 2.2691407737933287], ['Segesvár', 1.256138353142125], ['Maroshévíz', 2.096345753463996], ['Székelykeresztúr', 2.6130017911705825], ['Barót', 2.363929889298893], ['Balánbánya', 3.3360588716271464], ['Szentegyházasfalu', 2.2615250797332562], ['Szováta', 1.1266249398170438], ['Parajd', 1.7981438515081207]]
heatmap2 = pd.crosstab(df_birth_work.birth, df_birth_work.workwhat)
heatmap2_norm_col=(heatmap2-heatmap2.mean())/heatmap2.std()
fig, ax = plt.subplots(figsize=(15,10))
ax = sns.heatmap(heatmap2, annot=True, cbar=False, fmt='g', annot_kws={"size": 14})
ax.set_title('Születési hely - szakterület', fontsize = 24, y=1.04)
ax.set_xlabel('Szakterület', fontsize=18);
ax.set_ylabel('Születési hely', fontsize=18);
plt.setp(ax.get_xticklabels(), rotation='vertical', fontsize=14);
plt.setp(ax.get_yticklabels(), rotation='horizontal', fontsize=14);
Normalizálva oszlop szerint, mert a manager miatt a többi nem szembetűnő.
fig, ax = plt.subplots(figsize=(15,10))
ax = sns.heatmap(heatmap2_norm_col, annot=True, cbar=False, annot_kws={"size": 14})
ax.set_title('Születési hely - szakterület (oszlop szerint normalizálva)', fontsize = 24, y = 1.04)
ax.set_xlabel('Szakterület', fontsize=18);
ax.set_ylabel('Születési hely', fontsize=18);
plt.setp(ax.get_xticklabels(), rotation='vertical', fontsize=14);
plt.setp(ax.get_yticklabels(), rotation='horizontal', fontsize=14);
df_live_stud=df.loc[:, ['livesin', 'studwhat']].copy()
df_live_stud.dropna(inplace=True)
df_live_stud = df_live_stud.groupby('studwhat').filter(lambda x : len (x) > 15)
df_live_stud = df_live_stud.groupby('livesin').filter(lambda x : len (x) >= 20)
df_live_stud.iloc[:,0].size
2334
heatmap3 = pd.crosstab(df_live_stud.livesin, df_live_stud.studwhat)
heatmap3_norm_row=heatmap3.sub(heatmap3.mean(axis=1), axis=0)
heatmap3_norm_row = heatmap3_norm_row.div(heatmap3.std(axis=1), axis=0 )
fig, ax = plt.subplots(figsize=(15,10))
ax = sns.heatmap(heatmap3, annot=True, cbar=False, fmt='g', annot_kws={"size": 14})
ax.set_title('Lakhely - tanulmányok', fontsize = 24, y =1.04)
ax.set_xlabel('Tanulmányok', fontsize=18);
ax.set_ylabel('Lakhely', fontsize=18);
plt.setp(ax.get_xticklabels(), rotation='vertical', fontsize=14);
plt.setp(ax.get_yticklabels(), rotation='horizontal', fontsize=14);
Normalizálva sor zerint, mert Kolozsvár miatt a többi adat elenyészik.
fig, ax = plt.subplots(figsize=(15,10))
ax = sns.heatmap(heatmap3_norm_row, annot=True, cbar=False, annot_kws={"size": 14})
ax.set_title('Lakhely - tanulmányok (sor szerint normalizálva)', fontsize=24, y=1.04)
ax.set_xlabel('Tanulmányok', fontsize=18);
ax.set_ylabel('Lakhely', fontsize=18);
plt.setp(ax.get_xticklabels(), rotation='vertical', fontsize=14);
plt.setp(ax.get_yticklabels(), rotation='horizontal', fontsize=14);
df_live_work=df.loc[:, ['livesin', 'workwhat']].copy()
df_live_work.dropna(inplace=True)
df_live_work = df_live_work.groupby('livesin').filter(lambda x : len (x) >= 20)
df_live_work = df_live_work.groupby('workwhat').filter(lambda x : len (x) >= 10)
heatmap4 = pd.crosstab(df_live_work.livesin, df_live_work.workwhat)
heatmap4_norm_row=heatmap4.sub(heatmap4.mean(axis=1), axis=0)
heatmap4_norm_row = heatmap4_norm_row.div(heatmap4.std(axis=1), axis=0 )
fig, ax = plt.subplots(figsize=(15,10))
ax = sns.heatmap(heatmap4, annot=True, cbar=False, fmt='g', annot_kws={"size": 14})
ax.set_title('Lakhely - szakterület', fontsize = 24, y=1.04)
ax.set_xlabel('Szakterület', fontsize=18);
ax.set_ylabel('Lakhely', fontsize=18);
plt.setp(ax.get_xticklabels(), rotation='vertical', fontsize=14);
plt.setp(ax.get_yticklabels(), rotation='horizontal', fontsize=14);
fig, ax = plt.subplots(figsize=(15,10))
ax = sns.heatmap(heatmap4_norm_row, annot=True, cbar=False, annot_kws={"size": 14})
ax.set_title('Lakhely - szakterület (sor szerint normalizálva)', fontsize = 24, y=1.04)
ax.set_xlabel('Szakterület', fontsize=18);
ax.set_ylabel('Lakhely', fontsize=18);
plt.setp(ax.get_xticklabels(), rotation='vertical', fontsize=14);
plt.setp(ax.get_yticklabels(), rotation='horizontal', fontsize=14);
df_stud_work=df.loc[:, ['studwhat', 'workwhat']].copy()
df_stud_work.dropna(inplace=True)
df_stud_work = df_stud_work.groupby('studwhat').filter(lambda x : len (x) >= 15)
df_stud_work = df_stud_work.groupby('workwhat').filter(lambda x : len (x) >= 10)
heatmap5 = pd.crosstab(df_stud_work.studwhat, df_stud_work.workwhat)
heatmap5_norm_row=heatmap5.sub(heatmap5.mean(axis=1), axis=0)
heatmap5_norm_row = heatmap5_norm_row.div(heatmap5.std(axis=1), axis=0 )
fig, ax = plt.subplots(figsize=(15,10))
ax = sns.heatmap(heatmap5, annot=True, cbar=False, fmt='g', annot_kws={"size": 14})
ax.set_title('Tanulmányi terület - szakterület', fontsize = 24, y=1.04)
ax.set_xlabel('Szakterület', fontsize=18);
ax.set_ylabel('Tanulmányi terület', fontsize=18);
plt.setp(ax.get_xticklabels(), rotation='vertical', fontsize=14);
plt.setp(ax.get_yticklabels(), rotation='horizontal', fontsize=14);
colors1 = np.array(['rgb(4,4,21)', 'rgb(55,15,79)',
'rgb(105,27,127)', 'rgb(148,42,124)',
'rgb(189,66,117)', 'rgb(230,108,102)',
'rgb(251,144,103)', 'rgb(253,175,121)',
'rgb(253,203,143)', 'rgb(252,230,168)',
'rgb(255,255,255)']).tolist()
colors2 =np.array(['rgb(4,4,21)', 'rgb(35,11,56)',
'rgb(67,17,92)', 'rgb(98,24,127)',
'rgb(124,33,125)', 'rgb(150,43,124)',
'rgb(176,52,122)', 'rgb(201,78,113)',
'rgb(225,103,103)', 'rgb(250,129,94)',
'rgb(251,148,105)', 'rgb(253,166,116)',
'rgb(254,185,127)', 'rgb(253, 201, 142)',
'rgb(253, 218, 157)', 'rgb(252, 234, 172)',
'rgb(252, 244, 220)', 'rgb(252, 255, 255)']).tolist()
data_df=pd.crosstab(df_birth_stud.birth, df_birth_stud.studwhat)
data_df_normalized=pd.crosstab(df_birth_stud.birth, df_birth_stud.studwhat, normalize='index')
data={}
for i in range(len(data_df.columns)):
x = np.array(data_df.index).tolist()
y = np.array(data_df.iloc[:, i].values).tolist()
name = str(data_df.columns[i])
marker = dict([('color', colors1[i])])
trace = dict([('x', x), ('y', y), ('name', name), ('marker', marker), ('type', 'bar')])
data[i] = trace
with open('../data/birth_stud.json', 'w') as outfile:
json.dump(data, outfile, ensure_ascii=True)
data_normalized={}
for i in range(len(data_df_normalized.columns)):
x = np.array(data_df_normalized.index).tolist()
y = np.array(data_df_normalized.iloc[:, i].values).tolist()
name = str(data_df_normalized.columns[i])
marker = dict([('color', colors1[i])])
trace = dict([('x', x), ('y', y), ('name', name), ('marker', marker), ('type', 'bar')])
data[i] = trace
with open('../data/birth_stud_normalized.json', 'w') as outfile:
json.dump(data, outfile, ensure_ascii=True)
# heatmap
data = {}
x = np.array(data_df.columns).tolist()
y = np.array(data_df.index).tolist()
z = [[] for i in range(data_df.index.size)]
for i in range(len(data_df.index)):
z[i] = np.array(data_df.iloc[i, :].values).tolist()
data[0] = dict([('x', x), ('y',y), ('z',z), ('type', 'heatmap'),
('colorscale', [[0, 'rgb(4, 4, 21)'],
[0.35, 'rgb(98,24,127)'],
[0.5, 'rgb(176,52,122)'],
[0.6, 'rgb(250, 129, 94)'],
[0.7, 'rgb(254,185,127)'],
[1, 'rgb(252, 234, 172)']
]
)
])
with open('../data/birth_stud_heatmap.json', 'w') as outfile:
json.dump(data, outfile, ensure_ascii=True)
data_df=pd.crosstab(df_birth_work.birth, df_birth_work.workwhat)
data_df_normalized=pd.crosstab(df_birth_work.birth, df_birth_work.workwhat, normalize='index')
data={}
for i in range(len(data_df.columns)):
x = np.array(data_df.index).tolist()
y = np.array(data_df.iloc[:, i].values).tolist()
name = str(data_df.columns[i])
marker = dict([('color', colors2[i])])
trace = dict([('x', x), ('y', y), ('name', name), ('marker', marker), ('type', 'bar')])
data[i] = trace
with open('../data/birth_work.json', 'w') as outfile:
json.dump(data, outfile, ensure_ascii=True)
data_normalized={}
for i in range(len(data_df_normalized.columns)):
x = np.array(data_df_normalized.index).tolist()
y = np.array(data_df_normalized.iloc[:, i].values).tolist()
name = str(data_df_normalized.columns[i])
marker = dict([('color', colors2[i])])
trace = dict([('x', x), ('y', y), ('name', name), ('marker', marker), ('type', 'bar')])
data[i] = trace
with open('../data/birth_work_normalized.json', 'w') as outfile:
json.dump(data, outfile, ensure_ascii=True)
# heatmap
data = {}
x = np.array(data_df.columns).tolist()
y = np.array(data_df.index).tolist()
z = [[] for i in range(data_df.index.size)]
for i in range(len(data_df.index)):
z[i] = np.array(data_df.iloc[i, :].values).tolist()
data[0] = dict([('x', x), ('y',y), ('z',z), ('type', 'heatmap'),
('colorscale', [[0, 'rgb(4, 4, 21)'],
[0.35, 'rgb(98,24,127)'],
[0.5, 'rgb(176,52,122)'],
[0.6, 'rgb(250, 129, 94)'],
[0.7, 'rgb(254,185,127)'],
[1, 'rgb(252, 234, 172)']
]
)
])
with open('../data/birth_work_heatmap.json', 'w') as outfile:
json.dump(data, outfile, ensure_ascii=True)
data_df=pd.crosstab(df_live_stud.livesin, df_live_stud.studwhat)
data_df_normalized=pd.crosstab(df_live_stud.livesin, df_live_stud.studwhat, normalize='index')
data={}
for i in range(len(data_df.columns)):
x = np.array(data_df.index).tolist()
y = np.array(data_df.iloc[:, i].values).tolist()
name = str(data_df.columns[i])
marker = dict([('color', colors1[i])])
trace = dict([('x', x), ('y', y), ('name', name), ('marker', marker), ('type', 'bar')])
data[i] = trace
with open('../data/live_stud.json', 'w') as outfile:
json.dump(data, outfile, ensure_ascii=True)
data_normalized={}
for i in range(len(data_df_normalized.columns)):
x = np.array(data_df_normalized.index).tolist()
y = np.array(data_df_normalized.iloc[:, i].values).tolist()
marker = dict([('color', colors1[i])])
name = str(data_df_normalized.columns[i])
trace = dict([('x', x), ('y', y), ('name', name), ('marker', marker), ('type', 'bar')])
data[i] = trace
with open('../data/live_stud_normalized.json', 'w') as outfile:
json.dump(data, outfile, ensure_ascii=True)
# heatmap
data = {}
x = np.array(data_df.columns).tolist()
y = np.array(data_df.index).tolist()
z = [[] for i in range(data_df.index.size)]
for i in range(len(data_df.index)):
z[i] = np.array(data_df.iloc[i, :].values).tolist()
data[0] = dict([('x', x), ('y',y), ('z',z), ('type', 'heatmap'),
('colorscale', [[0, 'rgb(4, 4, 21)'],
[0.35, 'rgb(98,24,127)'],
[0.5, 'rgb(176,52,122)'],
[0.6, 'rgb(250, 129, 94)'],
[0.7, 'rgb(254,185,127)'],
[1, 'rgb(252, 234, 172)']
]
)
])
with open('../data/live_stud_heatmap.json', 'w') as outfile:
json.dump(data, outfile, ensure_ascii=True)
data_df=pd.crosstab(df_live_work.livesin, df_live_work.workwhat)
data_df_normalized=pd.crosstab(df_live_work.livesin, df_live_work.workwhat, normalize='index')
data={}
for i in range(len(data_df.columns)):
x = np.array(data_df.index).tolist()
y = np.array(data_df.iloc[:, i].values).tolist()
name = str(data_df.columns[i])
marker = dict([('color', colors2[i])])
trace = dict([('x', x), ('y', y), ('name', name), ('marker', marker), ('type', 'bar')])
data[i] = trace
with open('../data/live_work.json', 'w') as outfile:
json.dump(data, outfile, ensure_ascii=True)
data_normalized={}
for i in range(len(data_df_normalized.columns)):
x = np.array(data_df_normalized.index).tolist()
y = np.array(data_df_normalized.iloc[:, i].values).tolist()
marker = dict([('color', colors2[i])])
name = str(data_df_normalized.columns[i])
trace = dict([('x', x), ('y', y), ('name', name), ('marker', marker), ('type', 'bar')])
data[i] = trace
with open('../data/live_work_normalized.json', 'w') as outfile:
json.dump(data, outfile, ensure_ascii=True)
# heatmap
data = {}
x = np.array(data_df.columns).tolist()
y = np.array(data_df.index).tolist()
z = [[] for i in range(data_df.index.size)]
for i in range(len(data_df.index)):
z[i] = np.array(data_df.iloc[i, :].values).tolist()
data[0] = dict([('x', x), ('y',y), ('z',z), ('type', 'heatmap'),
('colorscale', [[0, 'rgb(4, 4, 21)'],
[0.35, 'rgb(98,24,127)'],
[0.5, 'rgb(176,52,122)'],
[0.6, 'rgb(250, 129, 94)'],
[0.7, 'rgb(254,185,127)'],
[1, 'rgb(252, 234, 172)']
]
)
])
with open('../data/live_work_heatmap.json', 'w') as outfile:
json.dump(data, outfile, ensure_ascii=True)
data_df=pd.crosstab(df_stud_work.studwhat, df_stud_work.workwhat)
data_df_normalized=pd.crosstab(df_stud_work.studwhat, df_stud_work.workwhat, normalize='index')
data={}
for i in range(len(data_df.columns)):
x = np.array(data_df.index).tolist()
y = np.array(data_df.iloc[:, i].values).tolist()
name = str(data_df.columns[i])
marker = dict([('color', colors2[i])])
trace = dict([('x', x), ('y', y), ('name', name), ('marker', marker), ('type', 'bar')])
data[i] = trace
with open('../data/stud_work.json', 'w') as outfile:
json.dump(data, outfile, ensure_ascii=True)
data_normalized={}
for i in range(len(data_df_normalized.columns)):
x = np.array(data_df_normalized.index).tolist()
y = np.array(data_df_normalized.iloc[:, i].values).tolist()
name = str(data_df_normalized.columns[i])
marker = dict([('color', colors2[i])])
trace = dict([('x', x), ('y', y), ('name', name), ('marker', marker), ('type', 'bar')])
data[i] = trace
with open('../data/stud_work_normalized.json', 'w') as outfile:
json.dump(data, outfile, ensure_ascii=True)
# heatmap
data = {}
x = np.array(data_df.columns).tolist()
y = np.array(data_df.index).tolist()
z = [[] for i in range(data_df.index.size)]
for i in range(len(data_df.index)):
z[i] = np.array(data_df.iloc[i, :].values).tolist()
data[0] = dict([('x', x), ('y',y), ('z',z), ('type', 'heatmap'),
('colorscale', [[0, 'rgb(4, 4, 21)'],
[0.35, 'rgb(98,24,127)'],
[0.5, 'rgb(176,52,122)'],
[0.6, 'rgb(250, 129, 94)'],
[0.7, 'rgb(254,185,127)'],
[1, 'rgb(252, 234, 172)']
]
)
])
with open('../data/stud_work_heatmap.json', 'w') as outfile:
json.dump(data, outfile, ensure_ascii=True)