from SPARQLWrapper import SPARQLWrapper, JSON from rdflib import ConjunctiveGraph, Namespace, Literal, RDF, RDFS, BNode, URIRef, XSD, Variable import operator import numpy as np import networkx as nx import matplotlib.pyplot as plt import urllib2, StringIO, csv import re %matplotlib inline NG_TEMPLATE = 'http://lod.cedar-project.nl/resource/r1/CUBE' END_POINT = 'http://lod.cedar-project.nl:8080/sparql/cedar' url = 'https://raw.githubusercontent.com/cgueret/Harmonize/master/cubes.txt' cubes = [cube.strip() for cube in StringIO.StringIO(urllib2.urlopen(url).read())] data_years=[] data_count=[] for cube in cubes: (cube_type, cube_year) = cube.split('-') if cube_type != 'BRT': continue data_years.append(int(cube_year)) named_graph = NG_TEMPLATE.replace('CUBE', cube) sparql = SPARQLWrapper(END_POINT) query = """ prefix cedar: prefix qb: select (sum(?s) as ?total) from where { ?o a qb:Observation. ?o cedar:occupation cedar:hisco-13000. ?o cedar:populationSize ?s. } """.replace('GRAPH',named_graph) sparql.setQuery(query) sparql.setReturnFormat(JSON) result = sparql.query().convert()["results"]["bindings"][0] total = 0 if 'total' in result: total = int(result['total']['value']) data_count.append(total) print data_years, data_count plt.plot(data_years, data_count, alpha=0.5) plt.scatter(data_years, data_count, alpha=0.5) plt.show() data_years=[] data_count=[] for cube in cubes: (cube_type, cube_year) = cube.split('-') if cube_type != 'VT': continue data_years.append(int(cube_year)) named_graph = NG_TEMPLATE.replace('CUBE', cube) sparql = SPARQLWrapper(END_POINT) query = """ prefix cedar: prefix qb: prefix sdmx-dimension: prefix sdmx-code: select (sum(?s) as ?total) from where { ?o a qb:Observation. ?o cedar:maritalStatus cedar:marital-Married. ?o sdmx-dimension:sex sdmx-code:sex-V. ?o cedar:populationSize ?s. } """.replace('GRAPH',named_graph) sparql.setQuery(query) sparql.setReturnFormat(JSON) result = sparql.query().convert()["results"]["bindings"][0] total = 0 if 'total' in result: total = int(result['total']['value']) data_count.append(total) print data_years, data_count plt.plot(data_years, data_count, alpha=0.5) plt.scatter(data_years, data_count, alpha=0.5) plt.show() data_years=[] data_count=[] for cube in cubes: (cube_type, cube_year) = cube.split('-') if cube_type != 'VT': continue data_years.append(int(cube_year)) named_graph = NG_TEMPLATE.replace('CUBE', cube) sparql = SPARQLWrapper(END_POINT) query = """ prefix cedar: prefix qb: prefix sdmx-dimension: prefix sdmx-code: select (sum(?s) as ?total) from where { ?o a qb:Observation. ?o cedar:city cedar:ac-11150. ?o cedar:populationSize ?s. } """.replace('GRAPH',named_graph) sparql.setQuery(query) sparql.setReturnFormat(JSON) result = sparql.query().convert()["results"]["bindings"][0] total = 0 if 'total' in result: total = int(result['total']['value']) data_count.append(total) print data_years, data_count plt.plot(data_years, data_count, alpha=0.5) plt.scatter(data_years, data_count, alpha=0.5) plt.show()