In [1]:
from brightway2 import *
from stats_arrays import LognormalUncertainty
from time import time
import math
import numpy as np
In [2]:
start = time()
In [3]:
method = (u'IPCC 2013', u'climate change', u'GWP 100a')
ei_original = Database("ecoinvent 2.2")

Create copy of this database. We will modify the copy.

In [4]:
if "ecoinvent 2.2 average" in databases:
    del databases["ecoinvent 2.2 average"]

db = ei_original.copy("ecoinvent 2.2 average")

Load the data to modify.

In [5]:
data = db.load()

Loop over the datasets and exchanges, replacing the median value with the average where possible.

The lognormal distribution is defined by two parameters, mu and sigma. In Brightway2, these parameters map to the keys loc and scale (see the stats_arrays documentation).

The default value used in ecoinvent is the median, which is:

$$ e^{\mu} $$

But we want to know how LCIA results will change if we use the average, which is:

$$ e^{\mu + \sigma^{2} / 2} $$
In [6]:
def get_median(exc):
    return math.exp(exc["loc"]) * (-1 if exc["negative"] else 1)

def get_average(exc):
    return math.exp(exc["loc"] + exc["scale"] ** 2 / 2) * (-1 if exc["negative"] else 1)

First, we make sure that the amount value really is the median.

In [7]:
for key, ds in data.items():
    for exc in ds.get("exchanges", []):
        if exc["uncertainty type"] == LognormalUncertainty.id:
            assert np.allclose(exc["amount"], get_median(exc))

Switch median values to average:

In [8]:
for key, ds in data.items():
    for exc in ds.get("exchanges", []):
        if exc["uncertainty type"] == LognormalUncertainty.id:
            exc["amount"] = get_average(exc)

Write the new data to our database.

In [9]:
db.write(data)
db.process()

Make sure that the processes are in the same order (to calculate correlation).

In [10]:
median_keys = sorted(Database("ecoinvent 2.2").load().keys())
average_keys = sorted(db.load().keys())
In [11]:
for x, y in zip(median_keys, average_keys)[:20]:
    print x[1], y[1]
0015f054356293398bca397fe9261b05 0015f054356293398bca397fe9261b05
001bc7601e327c14a6d2316fdb739012 001bc7601e327c14a6d2316fdb739012
0042a5e68766610f9ff87c8bdcc4527a 0042a5e68766610f9ff87c8bdcc4527a
004ee114924dcbc0733e20afa6a54147 004ee114924dcbc0733e20afa6a54147
0060ae00d9a29392009b1595722883a9 0060ae00d9a29392009b1595722883a9
00623899611a18f8616b6d2d9de7adfb 00623899611a18f8616b6d2d9de7adfb
0070b1bfab4db075dac3dc9127bfa99c 0070b1bfab4db075dac3dc9127bfa99c
007e45fdca1a46593e78fee91e91e192 007e45fdca1a46593e78fee91e91e192
007fb874be8334806a5fcd21f25bf841 007fb874be8334806a5fcd21f25bf841
0085e6f833c2ec2f1c315b1614a154ae 0085e6f833c2ec2f1c315b1614a154ae
009f24041e6117e7cd0983932f7de0cc 009f24041e6117e7cd0983932f7de0cc
00a1a85a022e137e5a458abbf2db9398 00a1a85a022e137e5a458abbf2db9398
00c0bb57dcd40ee0720631d3844a6308 00c0bb57dcd40ee0720631d3844a6308
00f9ac8683fc60b9d07a3b810fffe201 00f9ac8683fc60b9d07a3b810fffe201
01035f063a58fbe473533a5a749bbfd6 01035f063a58fbe473533a5a749bbfd6
0108fe5503fe8a71162c9eebb0a05056 0108fe5503fe8a71162c9eebb0a05056
01367561559ba5028445757823950466 01367561559ba5028445757823950466
0143c05ffb77084c2fd47c20d79d9c95 0143c05ffb77084c2fd47c20d79d9c95
015100cffb46ae40fa09e26d1f488e36 015100cffb46ae40fa09e26d1f488e36
0174d1df4413285eb3f4b3375a8d376c 0174d1df4413285eb3f4b3375a8d376c
In [12]:
results = np.zeros((len(median_keys), 2))

Calculate LCA all processes in ecoinvent 2.2.

In [13]:
lca = LCA({median_keys[0]: 1}, method)
lca.lci(factorize=True)
lca.lcia()

for index, key in enumerate(median_keys):
    lca.redo_lcia({key: 1})
    results[index, 0] = lca.score

Calculate LCA all processes in ecoinvent 2.2.

In [14]:
lca = LCA({average_keys[0]: 1}, method)
lca.lci(factorize=True)
lca.lcia()

for index, key in enumerate(average_keys):
    lca.redo_lcia({key: 1})
    results[index, 1] = lca.score

Exclude zero values.

In [15]:
mask = results[:, 0] == 0
results = results[~mask, :]

print "Excluded %s values because LCA score was zero" % mask.sum()
Excluded 112 values because LCA score was zero

Want to calculate ratio of LCIA scores.

In [16]:
ratio = results[:, 1] / results[:, 0]
In [17]:
np.histogram(ratio)
Out[17]:
(array([   2,    0,    0,    0,    0,    0,    0,    1, 3969,    3]),
 array([-90.17952953, -79.30564546, -68.4317614 , -57.55787734,
        -46.68399328, -35.81010921, -24.93622515, -14.06234109,
         -3.18845703,   7.68542703,  18.5593111 ]))

Exclude outliers.

In [18]:
mask_low = ratio < 0.8
mask_high = ratio > 1.4
print "Excluding %s values too low and %s values too high" % (mask_low.sum(), mask_high.sum())
ratio_masked = ratio[~mask_low * ~mask_high]
Excluding 37 values too low and 85 values too high

Get average and median (he he) differences, both for filtered and unfiltered data.

In [19]:
print "Unfiltered data:", np.average(ratio),        np.median(ratio)
print "Filtered data:",   np.average(ratio_masked), np.median(ratio_masked)
Unfiltered data: 1.04492640659 1.05317671682
Filtered data: 1.07148015049 1.05236198161
In [20]:
%matplotlib inline
In [21]:
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import gaussian_kde
In [22]:
kde = gaussian_kde(ratio_masked)
In [23]:
d = sns.distplot(ratio_masked, kde=0)
plt.xlim(0.8, 1.4)

xs = np.linspace(0.8, 1.4, 500)
plt.plot(
    xs, 
    kde.evaluate(xs), 
    lw=3, 
    color=(0.2980392156862745, 0.4470588235294118, 0.6901960784313725), 
    alpha=0.8
)
plt.xlabel("Ratio of LCIA scores, average / median (2013 IPCC 100-yr)")
plt.ylabel("Probability")

plt.savefig("ratio-histogram.png", dpi=600)
In [24]:
sns.jointplot(
    np.log(np.abs(results[:, 0])),
    np.log(np.abs(results[:, 1])),
)
Out[24]:
<seaborn.axisgrid.JointGrid at 0x11c59b3d0>
In [25]:
print "Elapsed calculation time:", time() - start
Elapsed calculation time: 72.3986759186