import requests
import pandas as pd
import sqlite3
from tqdm import tqdm
# Kod 4
r = requests.get('http://api.gios.gov.pl/pjp-api/rest/station/findAll')
stations = pd.io.json.json_normalize(r.json())
print(stations.shape)
(178, 10)
# you have to create path to db manually
conn = sqlite3.connect('../output/aqip.db')
c = conn.cursor()
# Kod 1
# stations
create_query = '''
CREATE TABLE IF NOT EXISTS stations (
id integer PRIMARY KEY,
addressStreet text,
city_commune_communeName text,
city_commune_districtName text,
city_commune_provinceName text,
city_id integer,
city_name text,
gegrLat real,
gegrLon real,
stationName text)
'''
c.execute(create_query)
<sqlite3.Cursor at 0x7facfdc0eab0>
# Kod 2
# sensors
create_query = '''
CREATE TABLE IF NOT EXISTS sensors (
id integer PRIMARY KEY,
param_idParam integer,
param_paramCode text,
param_paramFormula text,
param_paramName text,
stationId integer)
'''
c.execute(create_query)
<sqlite3.Cursor at 0x7facfdc0eab0>
# Kod 3
# readings
create_query = '''
CREATE TABLE IF NOT EXISTS readings (
id integer,
datetime text,
value real,
PRIMARY KEY (id, datetime))
'''
c.execute(create_query)
<sqlite3.Cursor at 0x7facfdc0eab0>
# Kod 5
for index, station in stations.iterrows():
c.execute("INSERT OR REPLACE INTO stations VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
([station["id"],
station["addressStreet"],
station["city.commune.communeName"],
station["city.commune.districtName"],
station["city.commune.provinceName"],
station["city.id"],
station["city.name"],
station["gegrLat"],
station["gegrLon"],
station["stationName"]]))
conn.commit()
# Kod 6
for station_id in tqdm(pd.read_sql_query("select id from stations", conn).values):
station_id = station_id[0]
#print(station_id)
r = requests.get('http://api.gios.gov.pl/pjp-api/rest/station/sensors/' + str(station_id))
sensors = pd.io.json.json_normalize(r.json())
for index, sensor in sensors.iterrows():
#print(sensor)
c.execute("INSERT OR REPLACE INTO sensors VALUES (?, ?, ?, ?, ?, ?)",
([sensor["id"],
sensor["param.idParam"],
sensor["param.paramCode"],
sensor["param.paramFormula"],
sensor["param.paramName"],
sensor["stationId"]]))
conn.commit()
#break
100%|██████████| 179/179 [00:46<00:00, 3.85it/s]
# Kod 7
lengths = []
for sensor_id in tqdm(pd.read_sql_query("select id from sensors", conn).values):
sensor_id = sensor_id[0]
#print(sensor_id)
r = requests.get('http://api.gios.gov.pl/pjp-api/rest/data/getData/' + str(sensor_id))
readings = pd.io.json.json_normalize(r.json())
readingsFrame = pd.DataFrame()
readingsFrame["datetime"] = [d[u'date'] for d in readings["values"].values.item()]
readingsFrame["value"] = [d[u'value'] for d in readings["values"].values.item()]
length = len(readingsFrame)
lengths.append(length)
#if length == 0:
# print(sensor_id)
for index, reading in readingsFrame.iterrows():
#print(reading["datetime"])
#print(reading["value"])
c.execute("INSERT OR REPLACE INTO readings VALUES (?, ?, ?)", ([int(sensor_id),
reading["datetime"],
reading["value"],
]))
conn.commit()
#break
100%|██████████| 715/715 [04:22<00:00, 2.73it/s]
pd.Series(lengths).value_counts()
60 693 0 21 61 1 dtype: int64
%%time
# Kod 8
query = """
SELECT readings.id as sensor_id, datetime, value, param_paramFormula, gegrLat, gegrLon, stationId as station_id
FROM readings, sensors, stations
WHERE readings.id = sensors.id
AND sensors.stationId = stations.id
"""
gios_data = pd.read_sql_query(query, conn)
gios_data.sort_values(by = ["sensor_id", "datetime"], inplace=True)
CPU times: user 469 ms, sys: 21.2 ms, total: 490 ms Wall time: 534 ms
gios_data.head()
sensor_id | datetime | value | param_paramFormula | gegrLat | gegrLon | station_id | |
---|---|---|---|---|---|---|---|
23 | 88 | 2019-02-13 01:00:00 | 15.0055 | NO2 | 50.972167 | 14.941319 | 14 |
22 | 88 | 2019-02-13 02:00:00 | 11.3674 | NO2 | 50.972167 | 14.941319 | 14 |
21 | 88 | 2019-02-13 03:00:00 | 10.1511 | NO2 | 50.972167 | 14.941319 | 14 |
20 | 88 | 2019-02-13 04:00:00 | 10.5415 | NO2 | 50.972167 | 14.941319 | 14 |
19 | 88 | 2019-02-13 05:00:00 | 12.2634 | NO2 | 50.972167 | 14.941319 | 14 |
gios_data.tail()
sensor_id | datetime | value | param_paramFormula | gegrLat | gegrLon | station_id | |
---|---|---|---|---|---|---|---|
58240 | 20916 | 2019-02-16 08:00:00 | 133.9090 | PM2.5 | 51.808662 | 15.708193 | 11295 |
58239 | 20916 | 2019-02-16 09:00:00 | 164.0960 | PM2.5 | 51.808662 | 15.708193 | 11295 |
58238 | 20916 | 2019-02-16 10:00:00 | 120.0040 | PM2.5 | 51.808662 | 15.708193 | 11295 |
58237 | 20916 | 2019-02-16 11:00:00 | 65.6273 | PM2.5 | 51.808662 | 15.708193 | 11295 |
58236 | 20916 | 2019-02-16 12:00:00 | 25.7272 | PM2.5 | 51.808662 | 15.708193 | 11295 |