#!/usr/bin/env python # coding: utf-8 # In[ ]: #%% """File 01nestedEstimation.py :author: Michel Bierlaire, EPFL :date: Wed Sep 11 09:59:55 2019 Estimation of a nested logit model, that will be used for simuation. Three alternatives: public transporation, car and slow modes. RP data. """ import pandas as pd import biogeme.database as db import biogeme.biogeme as bio from biogeme import models from biogeme.expressions import Beta # Read the data df = pd.read_csv('optima.dat', sep='\t') database = db.Database('optima', df) # The following statement allows you to use the names of the variable # as Python variable. globals().update(database.variables) # Exclude observations such that the chosen alternative is -1 database.remove(Choice == -1.0) # List of parameters to be estimated ASC_CAR = Beta('ASC_CAR', 0, None, None, 0) ASC_PT = Beta('ASC_PT', 0, None, None, 1) ASC_SM = Beta('ASC_SM', 0, None, None, 0) BETA_TIME_FULLTIME = Beta('BETA_TIME_FULLTIME', 0, None, None, 0) BETA_TIME_OTHER = Beta('BETA_TIME_OTHER', 0, None, None, 0) BETA_DIST_MALE = Beta('BETA_DIST_MALE', 0, None, None, 0) BETA_DIST_FEMALE = Beta('BETA_DIST_FEMALE', 0, None, None, 0) BETA_DIST_UNREPORTED = Beta('BETA_DIST_UNREPORTED', 0, None, None, 0) BETA_COST = Beta('BETA_COST', 0, None, None, 0) # Definition of variables: # For numerical reasons, it is good practice to scale the data to # that the values of the parameters are around 1.0. TimePT_scaled = TimePT / 200 TimeCar_scaled = TimeCar / 200 MarginalCostPT_scaled = MarginalCostPT / 10 CostCarCHF_scaled = CostCarCHF / 10 distance_km_scaled = distance_km / 5 male = Gender == 1 female = Gender == 2 unreportedGender = Gender == -1 fulltime = OccupStat == 1 notfulltime = OccupStat != 1 # Definition of utility functions: V_PT = ( ASC_PT + BETA_TIME_FULLTIME * TimePT_scaled * fulltime + BETA_TIME_OTHER * TimePT_scaled * notfulltime + BETA_COST * MarginalCostPT_scaled ) V_CAR = ( ASC_CAR + BETA_TIME_FULLTIME * TimeCar_scaled * fulltime + BETA_TIME_OTHER * TimeCar_scaled * notfulltime + BETA_COST * CostCarCHF_scaled ) V_SM = ( ASC_SM + BETA_DIST_MALE * distance_km_scaled * male + BETA_DIST_FEMALE * distance_km_scaled * female + BETA_DIST_UNREPORTED * distance_km_scaled * unreportedGender ) # Associate utility functions with the numbering of alternatives V = {0: V_PT, 1: V_CAR, 2: V_SM} # Definition of the nests: # 1: nests parameter # 2: list of alternatives MU_NOCAR = Beta('MU_NOCAR', 1.0, 1.0, None, 0) CAR_NEST = 1.0, [1] NO_CAR_NEST = MU_NOCAR, [0, 2] nests = CAR_NEST, NO_CAR_NEST # The choice model is a nested logit, with availability conditions logprob = models.lognested(V, None, nests, Choice) # Create the Biogeme object biogeme = bio.BIOGEME(database, logprob) biogeme.modelName = '01nestedEstimation' # Estimate the parameters. Calculate also the standard errors using # bootstrapping. results = biogeme.estimate(bootstrap=100) # Get the results in a pandas table pandasResults = results.getEstimatedParameters() print(pandasResults)