In [1]:
import os, datetime
import pandas as pd
import pickle
import csv
import xlrd
from pandas import Series, DataFrame, Panel
from datetime import datetime, date, timedelta
from os import sys
import numpy as np
import matplotlib.pyplot as plt
import folium
In [123]:
wdf1 = pd.read_csv('sparql')
In [124]:
wdf1.cause.describe()
Out[124]:
count                                                  3929
unique                                                  325
top       http://dbpedia.org/resource/Myocardial_infarction
freq                                                    594
Name: cause, dtype: object
In [125]:
wdf1.occupation.describe()
Out[125]:
count                                  3929
unique                                 2615
top       http://dbpedia.org/resource/Actor
freq                                    341
Name: occupation, dtype: object
In [129]:
wdf1.occupation.value_counts()
Out[129]:
http://dbpedia.org/resource/Actor                  341
http://dbpedia.org/resource/Film_director           45
http://dbpedia.org/resource/Comedian                45
http://dbpedia.org/resource/Child_actor             32
http://dbpedia.org/resource/Writer                  31
http://dbpedia.org/resource/Singing                 30
http://dbpedia.org/resource/Film_producer           29
http://dbpedia.org/resource/Journalist              28
http://dbpedia.org/resource/Screenwriter            27
http://dbpedia.org/resource/Businessperson          23
http://dbpedia.org/resource/Presenter               21
http://dbpedia.org/resource/Film                    21
http://dbpedia.org/resource/Television_producer     19
http://dbpedia.org/resource/Television              18
http://dbpedia.org/resource/Voice_acting            18
...
http://dbpedia.org/resource/Nora_Denney__1                      1
http://dbpedia.org/resource/Nate_Watt__1                        1
http://dbpedia.org/resource/Landscape_architecture              1
http://dbpedia.org/resource/Confidence_trick                    1
http://dbpedia.org/resource/Friedrich_Akel__3                   1
http://dbpedia.org/resource/Friedrich_Akel__2                   1
http://dbpedia.org/resource/Friedrich_Akel__1                   1
http://dbpedia.org/resource/Geoffrey_Hughes__1                  1
http://dbpedia.org/resource/Lee_Matasi__1                       1
http://dbpedia.org/resource/Alex_Macdonald_(trade_unionist)__1    1
http://dbpedia.org/resource/James_H._Snook__1                   1
http://dbpedia.org/resource/Peter_Ling__1                       1
http://dbpedia.org/resource/Floristry                           1
http://dbpedia.org/resource/Donald_Justin_Wolfram__1            1
http://dbpedia.org/resource/Pete_Postlethwaite__1               1
Length: 2615, dtype: int64
In [137]:
wdf1.cause.value_counts()[:10]
Out[137]:
http://dbpedia.org/resource/Myocardial_infarction      594
http://dbpedia.org/resource/Cancer                     312
http://dbpedia.org/resource/Pneumonia                  201
http://dbpedia.org/resource/Stroke                     167
http://dbpedia.org/resource/Lung_cancer                167
http://dbpedia.org/resource/Heart_failure              165
http://dbpedia.org/resource/Suicide                    146
http://dbpedia.org/resource/Pancreatic_cancer           88
http://dbpedia.org/resource/Murder                      77
http://dbpedia.org/resource/Death_by_natural_causes     72
dtype: int64
In [136]:
wdf1[wdf1['cause'] == 'http://dbpedia.org/resource/Lung_cancer'].occupation.value_counts()[:10]
Out[136]:
http://dbpedia.org/resource/Actor                  25
http://dbpedia.org/resource/Writer                  3
http://dbpedia.org/resource/Comedian                3
http://dbpedia.org/resource/Presenter               2
http://dbpedia.org/resource/Television              2
http://dbpedia.org/resource/Musician                2
http://dbpedia.org/resource/Television_producer     2
http://dbpedia.org/resource/Film_director           2
http://dbpedia.org/resource/Dance                   2
http://dbpedia.org/resource/Voice_acting            2
dtype: int64
In [138]:
wdf1[wdf1['cause'] == 'http://dbpedia.org/resource/Murder'].occupation.value_counts()[:10]
Out[138]:
http://dbpedia.org/resource/Maryknoll                    2
http://dbpedia.org/resource/Police_officer               2
http://dbpedia.org/resource/Rum-running                  2
http://dbpedia.org/resource/Crime                        1
http://dbpedia.org/resource/Skateboarding                1
http://dbpedia.org/resource/Dean_O'Banion__2             1
http://dbpedia.org/resource/Harry_Lazarus__1             1
http://dbpedia.org/resource/Student                      1
http://dbpedia.org/resource/Singing                      1
http://dbpedia.org/resource/John_Williams_Gunnison__1    1
dtype: int64
In [140]:
wdf1[(wdf1['cause'] == 'http://dbpedia.org/resource/Murder') & (wdf1['occupation'] == 'http://dbpedia.org/resource/Skateboarding')]
Out[140]:
person occupation cause
2227 http://dbpedia.org/resource/Lee_Matasi http://dbpedia.org/resource/Skateboarding http://dbpedia.org/resource/Murder

1 rows × 3 columns