In this notebook, we will use the ExcelImporter
to get all data from the spreadsheet, but will have to write strategies to understand the data ourselves.
from brightway2 import *
from stats_arrays import *
Start a new project, and install base data.
projects.set_current("excel import")
bw2setup()
The file we want to import has links to ecoinvent 3.1, so we will need to import this database.
ei = SingleOutputEcospold2Importer(
"/Users/cmutel/Documents/LCA Documents/Ecoinvent/3.1/cutoff/datasets",
"ecoinvent 3.1 cutoff"
)
ei.apply_strategies()
ei.write_database()
No need to keep the importer object around.
ei = None
Import the example spreadsheet data, and let's see what we have:
ex = ExcelImporter("files/example_excel_file.xlsx", "example")
from IPython.display import Image
Image(filename='files/example-excel-file.png')
OK, so we need to:
We could do this in a series of small functions chained together, but maybe it is easier to just process everything in one strategy.
A strategy is a function that transforms your data - in our case, from a list of lists of raw data values to something we can interact with.
uncertainty_mapping = {
0: UndefinedUncertainty.id,
1: LognormalUncertainty.id,
2: NormalUncertainty.id,
3: TriangularUncertainty.id,
4: UniformUncertainty.id
}
PROCESS_INDEX_NUMBER = 3707
PROCESS_INDEX_COL = 0
PROCESS_INDEX_ROW = 1
PROCESS_NAME_ROW = 2
PROCESS_LOCATION_ROW = 3
PROCESS_UNIT_ROW = 5
INPUT_GROUP_COL = 3
OUTPUT_GROUP_COL = 4
PRODUCTION_EXC_TYPE = 0
TECHNOSPHERE_EXC_TYPE = 5
BIOSPHERE_EXC_TYPE = 4
CATEGORY_COL = 7
SUBCATEGORY_COL = 8
EXCHANGE_PRODUCTION_NAME_COL = 5
EXCHANGE_PRODUCT_COL = 12
EXCHANGE_PROCESS_COL = 13
EXCHANGE_LOCATION_COL = 14
EXCHANGE_UNIT_COL = 10
EXCHANGE_AMOUNT_DELTA = 0
EXCHANGE_UNCERTAINTY_TYPE_DELTA = 1
EXCHANGE_SD95_DELTA = 2
EXCHANGE_COMMENT_DELTA = 3
def get_exchange_type(data, row):
if data[row][INPUT_GROUP_COL] == TECHNOSPHERE_EXC_TYPE:
return "technosphere"
elif data[row][OUTPUT_GROUP_COL] == BIOSPHERE_EXC_TYPE:
return "biosphere"
elif data[row][INPUT_GROUP_COL] == BIOSPHERE_EXC_TYPE:
return "biosphere"
elif data[row][OUTPUT_GROUP_COL] == PRODUCTION_EXC_TYPE:
return "production"
else:
raise ValueError
def get_exchange_rows(data):
return [x for x in range(len(data))
if data[x][PROCESS_INDEX_COL]
and data[x][PROCESS_INDEX_COL] != "Index"]
def extract_exchanges(data, col):
return [{
'production name': data[row][EXCHANGE_PRODUCTION_NAME_COL],
'activity name': data[row][EXCHANGE_PROCESS_COL],
'categories': (data[row][CATEGORY_COL], data[row][SUBCATEGORY_COL],),
'type': get_exchange_type(data, row),
'reference product': data[row][EXCHANGE_PRODUCT_COL],
'unit': data[row][EXCHANGE_UNIT_COL],
'location': data[row][EXCHANGE_LOCATION_COL],
'amount': data[row][col + EXCHANGE_AMOUNT_DELTA] or 0,
'uncertainty type': uncertainty_mapping[data[row][col + EXCHANGE_UNCERTAINTY_TYPE_DELTA] or 0],
'sd95': data[row][col + EXCHANGE_SD95_DELTA], # Will need to be transformed
'comment': data[row][col + EXCHANGE_COMMENT_DELTA]
} for row in get_exchange_rows(data)]
def extract_process(data, col, sheet):
return {
'name': data[PROCESS_NAME_ROW][col],
'unit': data[PROCESS_UNIT_ROW][col],
'location': data[PROCESS_LOCATION_ROW][col],
'worksheet': sheet,
'exchanges': extract_exchanges(data, col)
}
def process_sample_worksheet(obj):
ws_name, data = obj
nrows, ncols = len(data), len(data[0])
# Create dictionary of processes referred to by column index
processes = [extract_process(data, col, ws_name)
for col in range(ncols)
if data[PROCESS_INDEX_ROW][col] == PROCESS_INDEX_NUMBER]
return processes
def process_sample_excel_data(data):
return [item for obj in data for item in process_sample_worksheet(obj)]
We can now decide if we want the name to refer to activity name
or production name
; This is actually pretty easy.
def choose_name(data):
"""Get either activity or production name, and set to `name` field."""
for ds in data:
for exc in ds['exchanges']:
exc['name'] = exc['activity name'] or exc['production name']
del exc['production name']
del exc['activity name']
return data
Delete fields where we don't have data:
def clean_blank_fields(data):
"""Delete all exchange fields whose value is `''`."""
for ds in data:
ds['exchanges'] = [{k: v for k, v in exc.items() if v is not ''}
for exc in ds['exchanges']]
return data
Let's see what our data looks like so far:
clean_blank_fields(choose_name(process_sample_excel_data(ex.data)))
We can drop production exchanges where the amount is one, as this is the default.
def drop_production_exchanges_amount_one(data):
"""Delete categories if not given in Excel file"""
for ds in data:
ds['exchanges'] = [exc for exc in ds['exchanges']
if not (exc['type'] == 'production' and exc['amount'] == 1)]
return data
We can also delete categories if none are present:
def delete_missing_categories(data):
"""Drop production exchanges where the amount is 1"""
for ds in data:
for exc in ds['exchanges']:
if exc['categories'] in {('', ''), ('-', '-')}:
del exc['categories']
return data
Next, we add some already created strategies to:
from bw2io.strategies import (
normalize_units,
remove_zero_amount_coproducts,
normalize_biosphere_categories,
normalize_biosphere_names,
strip_biosphere_exc_locations,
)
Let's add and apply our strategies:
ex.strategies = [
process_sample_excel_data,
choose_name,
clean_blank_fields,
drop_production_exchanges_amount_one,
delete_missing_categories,
normalize_units,
remove_zero_amount_coproducts,
normalize_biosphere_categories,
normalize_biosphere_names,
strip_biosphere_exc_locations,
]
ex.apply_strategies()
ex.statistics()
ex.match_database("ecoinvent 3.1 cutoff")
ex.statistics()
ex.match_database(config.biosphere, kind="biosphere")
ex.statistics()
for exc in ex.unlinked:
print(exc)
And this is quite reasonable - the three unlinked inputs are not found in Ecoinvent, and therefore shouldn't be linked.
The next step would be one of manually link these missing exchanges, writing a data migration to change the input values, and then re-linking to ecoinvent, or creating the unlinked inputs as new processes.
Currently, we can write the dataset in its current state with ex.write_unlinked
; after linking, we would write the database with ex.write_database
.