from SPARQLWrapper import SPARQLWrapper, JSON
from rdflib import ConjunctiveGraph, Namespace, Literal, RDF, RDFS, BNode, URIRef, XSD, Variable
import operator
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import urllib2, StringIO, csv
import re
%matplotlib inline
NG_TEMPLATE = 'http://lod.cedar-project.nl/resource/r1/CUBE'
END_POINT = 'http://lod.cedar-project.nl:8080/sparql/cedar'
url = 'https://raw.githubusercontent.com/cgueret/Harmonize/master/cubes.txt'
cubes = [cube.strip() for cube in StringIO.StringIO(urllib2.urlopen(url).read())]
Teachers have the HISCO code 13000, this question is a simplified version of that found on slide 30 of this presentation
data_years=[]
data_count=[]
for cube in cubes:
(cube_type, cube_year) = cube.split('-')
if cube_type != 'BRT':
continue
data_years.append(int(cube_year))
named_graph = NG_TEMPLATE.replace('CUBE', cube)
sparql = SPARQLWrapper(END_POINT)
query = """
prefix cedar: <http://cedar.example.org/ns#>
prefix qb: <http://purl.org/linked-data/cube#>
select (sum(?s) as ?total) from <GRAPH> where {
?o a qb:Observation.
?o cedar:occupation cedar:hisco-13000.
?o cedar:populationSize ?s.
}
""".replace('GRAPH',named_graph)
sparql.setQuery(query)
sparql.setReturnFormat(JSON)
result = sparql.query().convert()["results"]["bindings"][0]
total = 0
if 'total' in result:
total = int(result['total']['value'])
data_count.append(total)
print data_years, data_count
[1889, 1899, 1909, 1920, 1930, 1947] [17653, 9170, 267, 0, 0, 0]
plt.plot(data_years, data_count, alpha=0.5)
plt.scatter(data_years, data_count, alpha=0.5)
plt.show()
data_years=[]
data_count=[]
for cube in cubes:
(cube_type, cube_year) = cube.split('-')
if cube_type != 'VT':
continue
data_years.append(int(cube_year))
named_graph = NG_TEMPLATE.replace('CUBE', cube)
sparql = SPARQLWrapper(END_POINT)
query = """
prefix cedar: <http://cedar.example.org/ns#>
prefix qb: <http://purl.org/linked-data/cube#>
prefix sdmx-dimension: <http://purl.org/linked-data/sdmx/2009/dimension#>
prefix sdmx-code: <http://purl.org/linked-data/sdmx/2009/code#>
select (sum(?s) as ?total) from <GRAPH> where {
?o a qb:Observation.
?o cedar:maritalStatus cedar:marital-Married.
?o sdmx-dimension:sex sdmx-code:sex-V.
?o cedar:populationSize ?s.
}
""".replace('GRAPH',named_graph)
sparql.setQuery(query)
sparql.setReturnFormat(JSON)
result = sparql.query().convert()["results"]["bindings"][0]
total = 0
if 'total' in result:
total = int(result['total']['value'])
data_count.append(total)
print data_years, data_count
[1795, 1830, 1840, 1849, 1859, 1869, 1879, 1889, 1899, 1909, 1920, 1930, 1947, 1971] [0, 3782337, 3630140, 5070644, 7513441, 6504906, 3541892, 1263520, 14269943, 10292988, 12812929, 22087751, 12975842, 0]
plt.plot(data_years, data_count, alpha=0.5)
plt.scatter(data_years, data_count, alpha=0.5)
plt.show()
data_years=[]
data_count=[]
for cube in cubes:
(cube_type, cube_year) = cube.split('-')
if cube_type != 'VT':
continue
data_years.append(int(cube_year))
named_graph = NG_TEMPLATE.replace('CUBE', cube)
sparql = SPARQLWrapper(END_POINT)
query = """
prefix cedar: <http://cedar.example.org/ns#>
prefix qb: <http://purl.org/linked-data/cube#>
prefix sdmx-dimension: <http://purl.org/linked-data/sdmx/2009/dimension#>
prefix sdmx-code: <http://purl.org/linked-data/sdmx/2009/code#>
select (sum(?s) as ?total) from <GRAPH> where {
?o a qb:Observation.
?o cedar:city cedar:ac-11150.
?o cedar:populationSize ?s.
}
""".replace('GRAPH',named_graph)
sparql.setQuery(query)
sparql.setReturnFormat(JSON)
result = sparql.query().convert()["results"]["bindings"][0]
total = 0
if 'total' in result:
total = int(result['total']['value'])
data_count.append(total)
print data_years, data_count
[1795, 1830, 1840, 1849, 1859, 1869, 1879, 1889, 1899, 1909, 1920, 1930, 1947, 1971] [2389519, 606525, 0, 1470085, 8212669, 5137603, 4978346, 6423682, 11996651, 7260189, 16510509, 23924726, 2186887, 7358562]
plt.plot(data_years, data_count, alpha=0.5)
plt.scatter(data_years, data_count, alpha=0.5)
plt.show()