#%%
"""File 06unifMixtureIntegral.py
:author: Michel Bierlaire, EPFL
:date: Sat Sep 7 20:45:18 2019
Example of a mixture of logit models, using numerical integration.
The mixing distribution is uniform.
Three alternatives: Train, Car and Swissmetro
SP data
"""
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme import models
import biogeme.messaging as msg
from biogeme.expressions import (
Beta,
DefineVariable,
Integrate,
RandomVariable,
exp,
log,
)
# Read the data
df = pd.read_csv('swissmetro.dat', sep='\t')
database = db.Database('swissmetro', df)
# The Pandas data structure is available as database.data. Use all the
# Pandas functions to invesigate the database
# print(database.data.describe())
# The following statement allows you to use the names of the variable
# as Python variable.
globals().update(database.variables)
# Removing some observations can be done directly using pandas.
# remove = (((database.data.PURPOSE != 1) &
# (database.data.PURPOSE != 3)) |
# (database.data.CHOICE == 0))
# database.data.drop(database.data[remove].index,inplace=True)
# Here we use the "biogeme" way for backward compatibility
exclude = ((PURPOSE != 1) * (PURPOSE != 3) + (CHOICE == 0)) > 0
database.remove(exclude)
# Parameters to be estimated
ASC_CAR = Beta('ASC_CAR', 0, None, None, 0)
ASC_TRAIN = Beta('ASC_TRAIN', 0, None, None, 0)
ASC_SM = Beta('ASC_SM', 0, None, None, 1)
B_COST = Beta('B_COST', 0, None, None, 0)
# Define a random parameter, normally distributed, designed to be used
# for numerical integration
B_TIME = Beta('B_TIME', 0, None, None, 0)
B_TIME_S = Beta('B_TIME_S', 1, None, None, 0)
omega = RandomVariable('omega')
a = -1
b = 1
x = a + (b - a) / (1 + exp(-omega))
dx = (b - a) * exp(-omega) * (1 + exp(-omega)) ** (-2)
B_TIME_RND = B_TIME + B_TIME_S * x
# Definition of new variables
SM_COST = SM_CO * (GA == 0)
TRAIN_COST = TRAIN_CO * (GA == 0)
# Definition of new variables: adding columns to the database
CAR_AV_SP = DefineVariable('CAR_AV_SP', CAR_AV * (SP != 0), database)
TRAIN_AV_SP = DefineVariable('TRAIN_AV_SP', TRAIN_AV * (SP != 0), database)
TRAIN_TT_SCALED = DefineVariable('TRAIN_TT_SCALED', TRAIN_TT / 100.0, database)
TRAIN_COST_SCALED = DefineVariable(
'TRAIN_COST_SCALED', TRAIN_COST / 100, database
)
SM_TT_SCALED = DefineVariable('SM_TT_SCALED', SM_TT / 100.0, database)
SM_COST_SCALED = DefineVariable('SM_COST_SCALED', SM_COST / 100, database)
CAR_TT_SCALED = DefineVariable('CAR_TT_SCALED', CAR_TT / 100, database)
CAR_CO_SCALED = DefineVariable('CAR_CO_SCALED', CAR_CO / 100, database)
# Definition of the utility functions
V1 = ASC_TRAIN + B_TIME_RND * TRAIN_TT_SCALED + B_COST * TRAIN_COST_SCALED
V2 = ASC_SM + B_TIME_RND * SM_TT_SCALED + B_COST * SM_COST_SCALED
V3 = ASC_CAR + B_TIME_RND * CAR_TT_SCALED + B_COST * CAR_CO_SCALED
# Associate utility functions with the numbering of alternatives
V = {1: V1, 2: V2, 3: V3}
# Associate the availability conditions with the alternatives
av = {1: TRAIN_AV_SP, 2: SM_AV, 3: CAR_AV_SP}
# Conditional to omega, we have a logit model (called the kernel)
condprob = models.logit(V, av, CHOICE)
# We integrate over omega using numerical integration
logprob = log(Integrate(condprob * dx / (b - a), 'omega'))
# Define level of verbosity
logger = msg.bioMessage()
# logger.setSilent()
# logger.setWarning()
logger.setGeneral()
# logger.setDetailed()
# Create the Biogeme object
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = '06unifMixtureIntegral'
# Estimate the parameters
results = biogeme.estimate()
pandasResults = results.getEstimatedParameters()
print(pandasResults)