#!/usr/bin/env python # coding: utf-8 # # ODM2 Example 2: Load Data into ODM2 from an Excel Template File # This example shows how to load data from an ODM2 Specimen Excel Template into an ODM2 SQLite database instance using the ODM2 YODA Tools library and the ODM2 Python application programming interface (API). This example uses SQLite for the database because it doesn't require a server. However, the process for creating ODM2 databases using other relational database management systems is very similar. The ODM2 Python API and YODA Tools demonstrated here can be used with ODM2 databases implemented in: # * Microsoft SQL Server # * MySQL # * PostgresSQL # * SQLite # # Details of the Specimen Excel Template (and others) for ODM2 can be found at: https://github.com/ODM2/YODA-File/tree/master/excel_templates. These Excel Templates were designed for investigators to enter their observations and metadata for parsing into an ODM2 database. We designed these templates under the premise that most scientists have and use Excel and can load thier data into the teamplate. # # Details of the YODA Tools libraries can be found at: https://github.com/ODM2/YODA-Tools. YODA Tools is a code base for working with ODM2 related files, loading them into ODM2 instances, and exporting from ODM2 to files. # # Details of the ODM2 Python API can be found at: https://github.com/ODM2/ODM2PythonAPI. The ODM2 Python API is an application programming interface for ODM2 databases that is cross platform and cross database compatible. # First, load the required libraries. YODA Tools requires the openpyxl package. This package is most likely not already installed in your user space/environment on the Jupyter Server, so run the following code to installe it and then load the rest of the libraries. # # **NOTE: If you have already installed openpyxl using the following code block and you execute it again, you may get an error or message saying that the requirement is already satisfied. You can ignore this and move to the next code block.** # In[ ]: # Run the following command to install the openpyxl module get_ipython().system('pip install openpyxl') # In[ ]: import os import sqlite3 import sys sys.path.insert(0, "code") from yodatools.converter.Inputs.ExcelInput import ExcelInput from yodatools.converter.Outputs.dbOutput import dbOutput from yodatools.converter.Outputs.yamlOutput import yamlOutput from IPython.core.display import display, HTML print("Done loading libraries!") # ### Create a New ODM2 Database to Load Data Into # # Create a blank ODM2 database into which we can load data. This is the same process from Example 1. # In[ ]: # First check to see if the ODM2 SQLite file already exists from previous runs of this example. If so, delete it. if os.path.isfile('data/ODM2_Example2.sqlite'): os.remove('data/ODM2_Example2.sqlite') # Create a new SQLite database and get a cursor conn = sqlite3.connect('data/ODM2_Example2.sqlite') c = conn.cursor() # Open the ODM2 DDL SQL file sqlString = open('code/ODM2_for_SQLite.sql', 'r').read() # Execute the DDL SQL script on the blank SQLite database c.executescript(sqlString) # Close the connection to the database conn.close() # Load the controlled vocabularies get_ipython().run_line_magic('run', 'code/cvload.py sqlite:///data/ODM2_Example2.sqlite') print "Done creating ODM2 database!" # ### Read the Excel Template Data File # # The HydroShare resource containing this notebook also contains an ODM2 Excel Template file in the "data" directory that contains a bunch of data derived from water quality samples collected at monitoring sites that are part of our iUTAH Gradients Along Mountain to Urban Transitions (GAMUT) monitoring network. This code opens the Excel template file and parses it using YODA Tools and the ODM2 Python API. Once the Excel Template file has been parsed, all of the data in the Excel file are available in the ODM2 Python API objects and can be accessed via code. # # **NOTE: This Excel template file contains a fairly large number of samples, so it takes a few seconds to parse.** # In[ ]: excel = ExcelInput('data/YODA_iUTAH_Specimen_Example_small.xlsx') excel.parse() session = excel.sendODM2Session() print("Done parsing Excel file!") # Provide a link to download the Excel file that was just loaded into the API print("\nYou can download the Excel file that was just loaded using the following link:\n") dbname = 'YODA_iUTAH_Specimen_Example_small.xlsx' display(HTML('%s' % ('data/%s' % dbname, dbname))) # ### Get Data from the Current API Session # # At this point, the data from the Excel Template file now exist in memory in the ODM2 Python API objects. We can do several things with the data now, including manipulating it or using it for visualization or analysis. We could write the data out to an operational ODM2 database, or we can write the data out to a YODA file. # # The following is a quick example of a simple query to the current session where the data are now held in memory. Use **session.query(ODM2Entity).all( )** where "session" refers to the current API session and "ODM2Entity" is the name of the ODM2 entity whose objects you want to return. In this example, we'll query all of the **Methods** from the session. If you want to return other objects, you could replace "Methods" with "Variables," "People," "ProcessingLevels," etc. and then change the attributes that get printed out to the console. # In[ ]: # Get all of the Methods that were loaded from the Excel file from odm2api.ODM2.models import * methods = session.query(Methods).all() # Print some of the attributes of the methods for x in methods: print("MethodCode: " + x.MethodCode + ", MethodName: " + x.MethodName + ", MethodTypeCV: " + x.MethodTypeCV) # ### Write the Data to the ODM2 Database # # Now that the Excel template file has been parsed, all of the data exist in the API objects. The following code actually writes the data to the empty ODM2 SQLite database created above. Although I'm using SQLite for this example to avoid needing a separate database server, this functionality will also work with Microsoft SQL Server, MySQL, and PostgresSQL. # # **NOTE: This Excel template file contains a fairly large number of samples, so it will take a bit to write it all to the SQLite database.** # # You can download the ODM2 SQLite file that has been populated with the data from the Excel Template file using the link that is printed when you run this code. **The third notebook example (ODM2_Example2.ipynb) shows how to connect to a populated ODM2 database and retrieve data using the ODM2 Python API.** # In[ ]: # Create a connection to the ODM2 database created above connection_string = 'sqlite:///data/ODM2_Example2.sqlite' # Write the data to the database do = dbOutput() do.save(session, connection_string) # Provide a link to the ODM2 SQLite file that the data were written to print("\nYou can download the ODM2 SQLite database populated with data using the following link:\n") dbname = 'ODM2_Example2.sqlite' display(HTML('%s' % ('data/%s' % dbname, dbname))) # ### Write the Data to a YODA File # The data contained in the API objects can also be written out to a YAML Observations Data Archive (YODA) file. After running the following code, you can download and examine the YAML file using the link that is printed. # In[ ]: # Write the output to the YODA file yo = yamlOutput() yo.save(session, 'data/ODM2_Example2.yaml') # Provide a link to download the YODA file created print("\nYou can download the populated YODA file using the following link:\n") dbname = 'ODM2_Example2.yaml' display(HTML('%s' % ('data/%s' % dbname, dbname)))