In [1]:
import requests
import pandas as pd
import sqlite3
from tqdm import tqdm
In [2]:
# Kod 4
r = requests.get('http://api.gios.gov.pl/pjp-api/rest/station/findAll')
stations = pd.io.json.json_normalize(r.json())
In [3]:
print(stations.shape)
(178, 10)
In [4]:
# you have to create path to db manually
conn = sqlite3.connect('../output/aqip.db')
c = conn.cursor()
In [5]:
# Kod 1
# stations
create_query = '''
CREATE TABLE IF NOT EXISTS stations (
id integer PRIMARY KEY, 
addressStreet text, 
city_commune_communeName text,
city_commune_districtName text,
city_commune_provinceName text,
city_id integer,
city_name text,
gegrLat real,
gegrLon real,
stationName text)
'''
c.execute(create_query)
Out[5]:
<sqlite3.Cursor at 0x7facfdc0eab0>
In [6]:
# Kod 2
# sensors
create_query = '''
CREATE TABLE IF NOT EXISTS sensors (
id integer PRIMARY KEY,
param_idParam integer,
param_paramCode text,
param_paramFormula text,
param_paramName text, 
stationId integer)
'''
c.execute(create_query)
Out[6]:
<sqlite3.Cursor at 0x7facfdc0eab0>
In [7]:
# Kod 3
# readings
create_query = '''
CREATE TABLE IF NOT EXISTS readings (
id integer,
datetime text,
value real,
PRIMARY KEY (id, datetime))
'''
c.execute(create_query)
Out[7]:
<sqlite3.Cursor at 0x7facfdc0eab0>
In [8]:
# Kod 5
for index, station in stations.iterrows():
    c.execute("INSERT OR REPLACE INTO stations VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
              ([station["id"], 
                station["addressStreet"],
                station["city.commune.communeName"],
                station["city.commune.districtName"],
                station["city.commune.provinceName"], 
                station["city.id"], 
                station["city.name"], 
                station["gegrLat"], 
                station["gegrLon"], 
                station["stationName"]]))
conn.commit()
In [9]:
# Kod 6
for station_id in tqdm(pd.read_sql_query("select id from stations", conn).values):
    station_id = station_id[0]
    #print(station_id)
    r = requests.get('http://api.gios.gov.pl/pjp-api/rest/station/sensors/' + str(station_id))
    sensors = pd.io.json.json_normalize(r.json())

    for index, sensor in sensors.iterrows():
        #print(sensor)
        c.execute("INSERT OR REPLACE INTO sensors VALUES (?, ?, ?, ?, ?, ?)", 
                  ([sensor["id"], 
                    sensor["param.idParam"],
                    sensor["param.paramCode"],
                    sensor["param.paramFormula"],
                    sensor["param.paramName"], 
                    sensor["stationId"]]))
    conn.commit()
    #break
100%|██████████| 179/179 [00:46<00:00,  3.85it/s]
In [10]:
# Kod 7
lengths = []
for sensor_id in tqdm(pd.read_sql_query("select id from sensors", conn).values):
    sensor_id = sensor_id[0]
    #print(sensor_id)
    r = requests.get('http://api.gios.gov.pl/pjp-api/rest/data/getData/' + str(sensor_id))
    readings = pd.io.json.json_normalize(r.json())
    
    readingsFrame = pd.DataFrame()
    
    readingsFrame["datetime"] = [d[u'date'] for d in readings["values"].values.item()]
    readingsFrame["value"] = [d[u'value'] for d in readings["values"].values.item()]
    
    length = len(readingsFrame)
    lengths.append(length)
    #if length == 0:
    #    print(sensor_id)
    
    for index, reading in readingsFrame.iterrows():
        #print(reading["datetime"])
        #print(reading["value"])
        
        c.execute("INSERT OR REPLACE INTO readings VALUES (?, ?, ?)", ([int(sensor_id), 
                                                               reading["datetime"],
                                                               reading["value"],
                                                               ]))
    conn.commit()
    #break
100%|██████████| 715/715 [04:22<00:00,  2.73it/s]
In [11]:
pd.Series(lengths).value_counts()
Out[11]:
60    693
0      21
61      1
dtype: int64

Reading data from database

In [13]:
%%time
# Kod 8


query = """
SELECT readings.id as sensor_id, datetime, value, param_paramFormula, gegrLat, gegrLon, stationId as station_id
FROM readings, sensors, stations
WHERE readings.id = sensors.id
 AND sensors.stationId = stations.id
"""

gios_data = pd.read_sql_query(query, conn)

gios_data.sort_values(by = ["sensor_id", "datetime"], inplace=True)
CPU times: user 469 ms, sys: 21.2 ms, total: 490 ms
Wall time: 534 ms
In [14]:
gios_data.head()
Out[14]:
sensor_id datetime value param_paramFormula gegrLat gegrLon station_id
23 88 2019-02-13 01:00:00 15.0055 NO2 50.972167 14.941319 14
22 88 2019-02-13 02:00:00 11.3674 NO2 50.972167 14.941319 14
21 88 2019-02-13 03:00:00 10.1511 NO2 50.972167 14.941319 14
20 88 2019-02-13 04:00:00 10.5415 NO2 50.972167 14.941319 14
19 88 2019-02-13 05:00:00 12.2634 NO2 50.972167 14.941319 14
In [15]:
gios_data.tail()
Out[15]:
sensor_id datetime value param_paramFormula gegrLat gegrLon station_id
58240 20916 2019-02-16 08:00:00 133.9090 PM2.5 51.808662 15.708193 11295
58239 20916 2019-02-16 09:00:00 164.0960 PM2.5 51.808662 15.708193 11295
58238 20916 2019-02-16 10:00:00 120.0040 PM2.5 51.808662 15.708193 11295
58237 20916 2019-02-16 11:00:00 65.6273 PM2.5 51.808662 15.708193 11295
58236 20916 2019-02-16 12:00:00 25.7272 PM2.5 51.808662 15.708193 11295
In [ ]: