In [1]:
from brightway2 import *
Warning: bad escape \s
In [2]:
projects.set_current("CMLCA import")
In [3]:
bw2setup()

db = Database("biosphere3")
db.make_searchable()
Biosphere database already present!!! No setup is needed
This database is already searchable
In [4]:
if "ecoinvent 2.2" not in databases:
    ei = SingleOutputEcospold1Importer(
        "/Users/cmutel/Documents/LCA Documents/Ecoinvent/2.2/processes", 
        "ecoinvent 2.2"
    )
    ei.apply_strategies()
    ei.write_database()
In [5]:
fp = ("Life cycle inventory of the production of rare earths and "
      "the subsequent production of NdFeB rare earth permanent "
      "magnets - supporting tables.xlsx")

ei = ExcelImporter(fp, "Rare earth metals")
Extracted 15 worksheets in 0.04 seconds
In [6]:
def reformat_sheet(sheet, ds, database_name):
    if ds[0][0][:10] != "Process = ":
        print("Ignoring worksheet {} - not valid inventory".format(sheet))
        return

    metadata = [
        # ("Process = ", 'name'),
        ('Description = ', 'description'),
        ('ID = ', 'cmlca id'),
        ('Stage = ', 'stage'),
        ('Region = ', 'location'),
        ('Author = ', 'author'),
        ('Date = ', 'date'),
    ]
    biosphere_flows = {
        "Environmental resources", 
        "Environmental emissions"
    }

    data, obj = [], None

    for index in range(len(ds)):
        if ds[index][0][:10] == "Process = ":
            if obj:
                data.append(obj)
            obj = {
                'sheet': sheet,
                'database': database_name,
                'exchanges': [],
                'name': ds[index][0].replace("Process = ", '')
            }
        elif any(ds[index][0][:len(x)] == x for x, y in metadata):
            for label, value in metadata:
                if ds[index][0][:len(label)] == label:
                    obj[value] = ds[index][0].replace(label, '')
        else:
            row = ds[index]
            if row[0] == 'Label' or not row[0]:
                continue
            elif row[0] in biosphere_flows:
                kind = 'biosphere'
                continue
            elif row[0] == "Economic inflows":
                kind = 'technosphere'
                continue
            elif row[0] == "Economic outflows":
                kind = 'production'
                continue
            elif not all(row):
                continue
            obj['exchanges'].append({
                'code': row[0],
                'name': row[1],
                'amount': row[2],
                'unit': row[3],
                'uncertainty': row[4],
                'type': kind,
            })
    if obj:
        data.append(obj)
    return data


def cmlca_reformat(data, database_name):
    """Change from extracted Excel cells to standard BW2 format"""
    return [
        x
        for row in data
        for x in reformat_sheet(sheet=row[0], ds=row[1], database_name=database_name) 
    ]
In [7]:
def reformat_biosphere_names(data):
    """Change from:

        "name": "Water, unspecified natural origin[resource_in water]" 

    to: 

        "name": "Water, unspecified natural origin",
        "categories": ("resource", "in water")

    """
    for ds in data:
        for exc in  ds.get('exchanges', []):
            if exc['type'] == "biosphere":
                # Use simple text processing instead of overly clever regex
                split_string = exc['name'].split("[")
                exc['name'] = "[".join(split_string[:-1])
                exc['categories'] = tuple(
                    split_string[-1].replace("]", "").split("_")
                )
    return data
In [8]:
def reformat_technosphere_names(data):
    """Change from:

        "name": "electricity, medium voltage, at grid[CN]" 

    to: 

        "name": "electricity, medium voltage, at grid",
        "location": "CN"

    """
    for ds in data:
        for exc in  ds.get('exchanges', []):
            if exc['type'] == "technosphere" and ("[" in exc['name'] and "]" in exc['name']):
                # Use simple text processing instead of overly clever regex
                split_string = exc['name'].split("[")
                exc['name'] = "[".join(split_string[:-1])
                exc['location'] = split_string[-1].replace("]", "")
                
    return data
In [9]:
def set_production_code_and_amount(data):
    """Set dataset production amount and code from single production exchange"""
    for ds in data:
        products = [x for x in ds.get('exchanges', []) if x.get('type') == 'production']
        if len(products) == 1:
            product = products[0]
            ds['code'] = product['code']
            ds['production amount'] = product['amount']
        else:
            print("Can't process multioutput process: {}: {}".format(ds['sheet'], ds['name']))
    return data
In [10]:
cmlca_migration = {
    'fields': ['name', 'categories', 'type'],
    'data': [
        (
            ('Water', ('resources',), 'biosphere'),
            {
                'categories': ('water',)
            }
        ),
        (
            ('Carbon dioxide', ('air',), 'biosphere'),
            {
                'name': 'Carbon dioxide, fossil',
            }
        ),
        (
            ('Carbon monoxide', ('air',), 'biosphere'),
            {
                'name': 'Carbon monoxide, fossil',
            }
        ),
        (
            ('2,3,7,8-tetrachlorodibenzo-p-dioxin', ('air',), 'biosphere'),
            {
                'name': 'Dioxins, measured as 2,3,7,8-tetrachlorodibenzo-p-dioxin',
            }
        ),
        (
            ('Particulates', ('air',), 'biosphere'),
            {
                'name': 'Particulates, > 2.5 um, and < 10um',
            }
        ),
        (
            ('Water, unspecified natural origin', ['natural resource', 'in water'], 'biosphere'),
            {
                'unit': 'cubic meter',
            }
        ),
    ]
}

cmlca_migration_units = {
    'fields': ['unit'],
    'data': [
        (
            ('m2/m2',),
            {'unit': "square meter"}
        ),
        (
            ('kg/m2',),
            {'unit': "kilogram"}
        ),
        (
            ('m3/m2',),
            {'unit': "cubic meter"}
        ),
        (
            ('kWh/m2',),
            {'unit': "kilowatt hour"}
        ),
    ]
}

if "cmlca" not in migrations:
    Migration("cmlca").write(
        cmlca_migration, 
        "Reformat biosphere flows to ecoinvent standards"
    )
    Migration("cmlca units").write(
        cmlca_migration_units, 
        "Reformat electroplating flows to correct functional unit"
    )    
In [11]:
from bw2io.strategies import *
In [12]:
import functools

strategies = [
    functools.partial(cmlca_reformat, database_name=ei.db_name),
    reformat_biosphere_names,
    reformat_technosphere_names,
    set_production_code_and_amount,
    drop_unspecified_subcategories,
    normalize_biosphere_categories,
    normalize_biosphere_names,
    normalize_units,
    functools.partial(migrate_exchanges, migration="cmlca"),
    functools.partial(migrate_exchanges, migration="cmlca units"),
    functools.partial(migrate_exchanges, migration="biosphere-2-3-names"),
    link_technosphere_by_activity_hash,
]

ei.apply_strategies(strategies)
Applying strategy: cmlca_reformat
Applying strategy: reformat_biosphere_names
Applying strategy: reformat_technosphere_names
Applying strategy: set_production_code_and_amount
Can't process multioutput process: Beneficiation: [P4098] beneficiation of REE ore, baseline
Can't process multioutput process: Acid roasting: [P4097] REE acid roasting, baseline
Can't process multioutput process: Solvent extraction: [P4096] REO solvent extraction, baseline
Can't process multioutput process: Grinding&slicing: [P4093] grinding and slicing of NdFeB, primary, baseline
Applying strategy: drop_unspecified_subcategories
Applying strategy: normalize_biosphere_categories
Applying strategy: normalize_biosphere_names
Applying strategy: normalize_units
Applying strategy: migrate_exchanges
Applying strategy: migrate_exchanges
Applying strategy: migrate_exchanges
Applying strategy: link_technosphere_by_activity_hash
Couldn't apply strategy link_technosphere_by_activity_hash:
	Not all datasets in database to be linked have ``database`` or ``code`` attributes
Applied 12 strategies in 0.02 seconds
In [13]:
ei.match_database("biosphere3", kind="biosphere")
ei.match_database("ecoinvent 2.2", fields=['name', 'location', 'unit'])
Applying strategy: link_iterable_by_fields
Applying strategy: link_iterable_by_fields
In [17]:
ei.statistics()
16 datasets
229 exchanges
33 unlinked exchanges
  Type production: 18 unique unlinked exchanges
  Type technosphere: 15 unique unlinked exchanges
Out[17]:
(16, 229, 33)
In [15]:
ei.write_excel()
Wrote matching file to:
/Users/cmutel/Library/Application Support/Brightway3/CMLCA-import.ec6c7b98f76633079e98132f49507f1e/export/db-matching-Rare-earth-metals.xlsx