#!/usr/bin/env python # coding: utf-8 # In[ ]: #%% """File 06unifMixtureIntegral.py :author: Michel Bierlaire, EPFL :date: Sat Sep 7 20:45:18 2019 Example of a mixture of logit models, using numerical integration. The mixing distribution is uniform. Three alternatives: Train, Car and Swissmetro SP data """ import pandas as pd import biogeme.database as db import biogeme.biogeme as bio from biogeme import models import biogeme.messaging as msg from biogeme.expressions import ( Beta, DefineVariable, Integrate, RandomVariable, exp, log, ) # Read the data df = pd.read_csv('swissmetro.dat', sep='\t') database = db.Database('swissmetro', df) # The Pandas data structure is available as database.data. Use all the # Pandas functions to invesigate the database # print(database.data.describe()) # The following statement allows you to use the names of the variable # as Python variable. globals().update(database.variables) # Removing some observations can be done directly using pandas. # remove = (((database.data.PURPOSE != 1) & # (database.data.PURPOSE != 3)) | # (database.data.CHOICE == 0)) # database.data.drop(database.data[remove].index,inplace=True) # Here we use the "biogeme" way for backward compatibility exclude = ((PURPOSE != 1) * (PURPOSE != 3) + (CHOICE == 0)) > 0 database.remove(exclude) # Parameters to be estimated ASC_CAR = Beta('ASC_CAR', 0, None, None, 0) ASC_TRAIN = Beta('ASC_TRAIN', 0, None, None, 0) ASC_SM = Beta('ASC_SM', 0, None, None, 1) B_COST = Beta('B_COST', 0, None, None, 0) # Define a random parameter, normally distributed, designed to be used # for numerical integration B_TIME = Beta('B_TIME', 0, None, None, 0) B_TIME_S = Beta('B_TIME_S', 1, None, None, 0) omega = RandomVariable('omega') a = -1 b = 1 x = a + (b - a) / (1 + exp(-omega)) dx = (b - a) * exp(-omega) * (1 + exp(-omega)) ** (-2) B_TIME_RND = B_TIME + B_TIME_S * x # Definition of new variables SM_COST = SM_CO * (GA == 0) TRAIN_COST = TRAIN_CO * (GA == 0) # Definition of new variables: adding columns to the database CAR_AV_SP = DefineVariable('CAR_AV_SP', CAR_AV * (SP != 0), database) TRAIN_AV_SP = DefineVariable('TRAIN_AV_SP', TRAIN_AV * (SP != 0), database) TRAIN_TT_SCALED = DefineVariable('TRAIN_TT_SCALED', TRAIN_TT / 100.0, database) TRAIN_COST_SCALED = DefineVariable( 'TRAIN_COST_SCALED', TRAIN_COST / 100, database ) SM_TT_SCALED = DefineVariable('SM_TT_SCALED', SM_TT / 100.0, database) SM_COST_SCALED = DefineVariable('SM_COST_SCALED', SM_COST / 100, database) CAR_TT_SCALED = DefineVariable('CAR_TT_SCALED', CAR_TT / 100, database) CAR_CO_SCALED = DefineVariable('CAR_CO_SCALED', CAR_CO / 100, database) # Definition of the utility functions V1 = ASC_TRAIN + B_TIME_RND * TRAIN_TT_SCALED + B_COST * TRAIN_COST_SCALED V2 = ASC_SM + B_TIME_RND * SM_TT_SCALED + B_COST * SM_COST_SCALED V3 = ASC_CAR + B_TIME_RND * CAR_TT_SCALED + B_COST * CAR_CO_SCALED # Associate utility functions with the numbering of alternatives V = {1: V1, 2: V2, 3: V3} # Associate the availability conditions with the alternatives av = {1: TRAIN_AV_SP, 2: SM_AV, 3: CAR_AV_SP} # Conditional to omega, we have a logit model (called the kernel) condprob = models.logit(V, av, CHOICE) # We integrate over omega using numerical integration logprob = log(Integrate(condprob * dx / (b - a), 'omega')) # Define level of verbosity logger = msg.bioMessage() # logger.setSilent() # logger.setWarning() logger.setGeneral() # logger.setDetailed() # Create the Biogeme object biogeme = bio.BIOGEME(database, logprob) biogeme.modelName = '06unifMixtureIntegral' # Estimate the parameters results = biogeme.estimate() pandasResults = results.getEstimatedParameters() print(pandasResults)