#!/usr/bin/env python # coding: utf-8 # After comparing Keeling's Matlab code and output, and using multiple print statements throughout my code, I am still not sure why the size of the nearest infected farm is not significant. I found a couple of bugs, but they did not change the results. I also remembered that the reason I had previously decided not to use Keeling's kernel and use a Cauchy kernel was because Keeling's kernel makes the disease progress very slowly. So I have used the Cauchy kernel in the code below. The changes that I have made result in output that is consistent with Keeling's code, however I have noticed that in this new version the number of new infected is dramatically lower than it used to be. # In[1]: import numpy as np from random import randint # DONT USE CAPITALIZED NAMES FOR VARIABLES; RESERVE THESE FOR CLASSES ONLY # ALSO, MORE DESCRIPTIVE VARIABLE NAMES WOULD HELP -- SIZE OF WHAT, FOR EXAMPLE Size = 20 N = 75 np.random.seed(53080) x = Size*np.random.rand(N) np.random.seed(23003) y = Size*np.random.rand(N) #np.random.seed(10) #Cows = np.array([randint(25,250) for p in range(N)]) # YOU ARE BETTER OFF USING NUMPY'S RANDOM NUMBER GENERATOR, SINCE YOU HAVE IMPORTED IT ALREADY # IS THERE A REASON YOU ARE USING THE BUILTIN ONE INSTEAD? # THEN YOU CAN SIMPLY: np.random.randint(25, 51, size=15) Cows = np.array([randint(25,51) for p in range(15)]+[randint(51,76) for p in range(30)]+[randint(76,95) for p in range(20)]+[randint(95,250) for p in range(10)]) # AGAIN, USING NUMPY YOU CAN CONCATENATE THESE USING np.r_ INSTEAD OF SUMMING LISTS #np.random.seed(11) #Sheep = np.array([randint(25,250) for p in range(N)]) Sheep = np.array([randint(25,51) for p in range(15)]+[randint(51,76) for p in range(30)]+[randint(76,95) for p in range(20)]+[randint(95,250) for p in range(10)]) # In[10]: np.array([4,5,7,2,10,11]) & np.arange(6) # In[7]: #Calculates which grid square a particular location is in (turn a 2-d coordinate into a scalar) def WhichGrid(x,y,XRange,YRange,XNum,YNum): #Essentially: floor(Unif[0,1)griddim)griddim+floor(Unif[0,1)griddim)+1 #Returns a number from 1 to griddim^2 return(np.floor(x*(XNum/XRange))*YNum+np.floor(y*(YNum/YRange))+1) # In[8]: def Kernel(dist_squared): dist_squared = np.asarray(dist_squared) # WHY NOT JUST is_scalar = dist_squared.ndim == 0 is_scalar = False if dist_squared.ndim > 0 else True # NOT CLEAR WHAT YOU ARE DOING BELOW dist_squared.shape = (1,)*(1-dist_squared.ndim) + dist_squared.shape K = 1 / (pi * (1 + dist_squared**2)) K[(dist_squared < 0.0138)] = 0.3093 K[(dist_squared > 60*60)] = 0 return(K if not is_scalar else K[0]) # In[9]: from math import pi def Iterate(Status, x, y, Suscept, Transmiss, grid, first_in_grid, last_in_grid, Num, MaxRate): Event = 0*Status INF = np.where(Status>5)[0] NI = INF.size # Note reported farms still infectious IGrids = grid[INF]-1 for ii in range(NI): INFi = INF[ii] # THERE IS RARELY ANY NEED TO USE np.multiply. JUST USE -Transmiss[INFi]*Num trans = np.multiply(-Transmiss[INFi],Num) #transmissibility of infected farm to all other grid squares maxr = MaxRate[IGrids[ii],:] #max number of animals to be infected in infected grid square # Elementwise multiplication rate = np.multiply(trans, maxr) #max number of animals to be infected in each grid square based on infected grid square MaxProb = 1 - np.exp(rate) #Max probability that infected farm infected noninfected farm rng = np.random.rand(len(MaxProb)) m = np.where((MaxProb - rng)>0)[0] #these grid squares need further consideration for n in range(len(m)): s = 1 M = m[n] PAB = 1 - np.exp(-Transmiss[INFi]*MaxRate[IGrids[ii],M]) #Max probability that infected farm infects noninfected farms under consideration if (PAB == 1): # Calculate the infection probability for each farm in the susceptible grid leng = last_in_grid[M]-first_in_grid[M]+1 R = np.random.rand(leng) for j in range(leng): ind1 = first_in_grid[M]+j-1 Q = 1 - np.exp(-Transmiss[INFi]*Suscept[ind1]*Kernel((x[INFi]-x[ind1])**2+(y[INFi]-y[ind1])**2)) if ((R[j] < Q) & (Status[ind1] == 0)): Event[ind1] = 1 else: R = np.random.rand(Num[M]) # Loop through all susceptible farms in the grids where an infection event occurred. for j in range(Num[M]): P = 1 - s*(1 - PAB)**(Num[M] - j) if (R[j] < (PAB / P)): s = 0 ind1=first_in_grid[M]+j-1 # HAVE YOU TRIED ANALYZING JUST THIS FUNCTION FOR SENSITIVITY TO # FARM SIZE? Q=1-np.exp(-Transmiss[INFi]*Suscept[ind1]*Kernel((x[INFi]-x[ind1])**2+(y[INFi]-y[ind1])**2)) if ((R[j]< Q/P) & (Status[ind1] == 0)): Event[ind1] = 1 # Evolve the infection process of those farms which have been exposed and already infectious ones. Status[Status > 0] += 1 Status = Status + Event #m=np.where(Status==13); # Initiate Ring Culling Around Reported Farm #for i in range(len(m)): # Status[m[i]]=-1; return {'Status':Status,'NI':NI} # THE FUNCTION BELOW IS CUMBERSOME. BREAK IT DOWN INTO FUNCTIONS, AND BE SURE TO WRITE TESTS TO ENSURE THAT EACH PIECE IS OPERATING AS EXPECTED # In[10]: def Outbreaks(Size,N,Y0,farms,end,end2,x,y,Cows,Sheep,Maxtime=1000): #This is an attempt of converting the Matlab Program 7.6 Code into Python # LIBRARY IMPORT SHOULD NOT OCCUR INSIDE OF FUNCTIONS import numpy as np import pandas as pd from math import pi Status = np.array([0]*N) #Initial Status of each farm init_ind = np.random.randint(0,N) for i in range(Y0): Status[init_ind] = 6 #one farm is initially infected #Cows are 10.5 times more susceptible to disease than sheep Suscept = Sheep+10.5*Cows Transmiss = 5.1e-7*Sheep + 7.7e-7*Cows #Set up the grid grid = WhichGrid(x,y,Size,Size,10.0,10.0) tmp = sorted(grid) #Sort grid values # DO WE REALLY NEED ALL OF THE BELOW? IF SO, BETTER TO INITIALIZE A PANDAS # DATA FRAME THAN ALL THESE LISTS #i = np.argsort(grid) #get indexed values after sort i = [i[0] for i in sorted(enumerate(grid), key=lambda x:x[1])] x = x[i] y = y[i] Status = Status[i] grid = grid[i] Transmiss = Transmiss[i] Suscept = Suscept[i] Cows = Cows[i] Sheep = Sheep[i] Xgrid = [] Ygrid = [] Num = [] first_in_grid = [] last_in_grid = [] Max_Sus_grid = [] index_inf = np.where(Status==6)[0].astype(int) m2 = np.array(np.where(grid==1)) for i in range(1,int(max(grid))+1): #turn the grid square number into an x-coordinate and y-coordinate (should not exceed XNum) Xgrid.append(np.floor((i-1)/10)) Ygrid.append((i-1)%10) m = np.array(np.where(grid==i)) Num.append(m.shape[1]) if Num[i-1] > 0: first_in_grid.append(m.min()) #Add the "+1" here so the indicies match those in the Keeling code last_in_grid.append(m.max()) Max_Sus_grid.append(Suscept[m].max()) else: first_in_grid.append(0) last_in_grid.append(-1) Max_Sus_grid.append(0) #Work out grid to maximum grid transmission probabilities from numpy import ndarray MaxRate = ndarray((max(grid),max(grid))) #Determine maximum number of animals to be infected in each grid square # YOUR INDENTATION IS MESSED UP HERE (only 3 spaces) WHICH SUGGESTS THIS NEVER GETS CALLED for i in range (1,int(max(grid))): for j in range(1,int(max(grid))): if ((i-1)==(j-1)) | (Num[i-1]==0) | (Num[j-1] == 0): MaxRate[i-1,j-1] = np.inf else: Dist2 = (Size*max([0,(abs(Xgrid[i-1]-Xgrid[j-1])-1)])/10)**2+(Size*max([0,(abs(Ygrid[i-1]-Ygrid[j-1])-1)])/10)**2 MaxRate[i-1,j-1] = Max_Sus_grid[j-1]*Kernel(Dist2) #Susceptible, Exposed, Infectious, Reported.==> latent period is 4 days i=1; S=len(np.where(Status==0)); E=len(np.where(np.logical_and(Status>0, Status<=5)));I=len(np.where(np.logical_and(Status>5, Status<=9))); R=len(np.where(Status==10)); R2=len(np.where(Status>9)); CullSheep=0; CullCattle=0; i=i+1; IterateFlag=1; # THIS IS A BAD APPROACH; GROWING LISTS IS EXTREMELY INEFFICIENT S=[] E=[] I=[] R=[] R2=[] CullSheep=[] CullCattle=[] t=[] t.append(0) results = np.c_[np.array([1]*N),np.arange(1,N+1),np.array([0]*N)] # NEVER SEPARATE STATEENTS WITH SEMICOLONS; VERY HARD TO READ AND DEBUG # JUST USE & INSTEAD OF logical_and while(np.logical_and(t[-1]0, Status<=5))[0]; Inf=np.where(Status>5)[0]; S.append(len(Sus)); E.append(len(Exp)); I.append(len(Inf)); t.append(t[i-2]+1);i+=1; #This is how I stop the simulation (all farms are infected) if t[-1]>5: if np.logical_or((E[-4]+I[-4]==0),I == N): # JUST USE A break STATEMENT INSTEAD OF A FLAG IterateFlag=0 # MOVE IMPORTS OUT OF FUNCTION from scipy.stats import itemfreq sim_num = np.array([i-1]*N) seq = np.arange(1,N+1) results_full = np.r_[results,np.c_[sim_num,seq,Status]] results = results_full #Return information regarding only farm of interest this = results_full[np.logical_or.reduce([results_full[:,1] == x for x in farms])] #Extract rows relating to timepoint of interest no_this = this[this[:,0]==end] #turn status to an indicator Status_ind = (no_this[:,2]>5).astype(int) # YOU SPEND A LOT OF ENERGY CONVERTING BACK AND FORTH BETWEEN NUMPY ARRAYS AND LISTS # JUST USE ARRAYS #Calculate distance to index farm - first infected is first in list of coords coords = list(zip(x,y)) index = np.array((coords[index_inf][0],coords[index_inf][1])) # THE BELOW WOULD BE MUCH CLEARER AS A LIST COMPREHENSION dist = [] for j in range(0,N): # CANT YOU JUST SUBINDEX THIS? coords[j,:2] b = np.array((coords[j][0],coords[j][1])) dist.append(np.linalg.norm(b-index)) to_return = np.c_[no_this[:,1],Status_ind,dist,Cows,Sheep,x,y] from scipy import spatial #Extract the infected farms inf_farms = to_return[to_return[:,1]==1] coords = list(zip(x,y)) #Create list of coordinates infected farms inf_farm_coords = list(zip(inf_farms[:,5],inf_farms[:,6])) # NOT CLEAR WHY YOU ARE DOING ALL THIS EXPENSIVE LIST CREATION list_of_inf_coords = [list(elem) for elem in inf_farm_coords] #Create list of coordinates of all farms list_of_coords = [list(elem) for elem in coords] #Calculate Euclidean distance from each farm to all infected farms- each row in matrix represents #distance of one farm to each infected farm dist_to_inf = spatial.distance_matrix(list_of_coords,list_of_inf_coords) #Find distance to closest infected farm # THERE IS A FUNCTION (np.minimum) THAT DOES ALL THIS FOR YOU def minval(array): #return(np.min(array[np.nonzero(array)])) return(np.min(array)) closest_infected = np.apply_along_axis(minval,1,dist_to_inf) average_infected = np.apply_along_axis(np.mean,1,dist_to_inf) to_return = np.c_[to_return,closest_infected,average_infected] #Create list of number of Cows and number of sheep for infected farms inf_farm_cows = list(inf_farms[:,3]) inf_farm_sheep = list(inf_farms[:,4]) #Create a function that extracts farm size based on closest infected farm def where_minval(array): #return(np.argmin(array[np.nonzero(array)])) return(np.argmin(array)) closest_infected_size_ind = np.apply_along_axis(where_minval,1,dist_to_inf) closest_infected_cows = [inf_farm_cows[i] for i in closest_infected_size_ind] closest_infected_sheep = [inf_farm_sheep[i] for i in closest_infected_size_ind] #Returns array: farmID, Status_ind, dist_to_index, num_Cows,num_Sheep,x,y,,disttoclosestinf,avgdisttoinf,#cowsinclosestiffarm,#sheepinclosestinffarm to_return = np.c_[to_return,closest_infected_cows,closest_infected_sheep] #Now run the outbreak for the additional end - end2 steps while(np.logical_and(t[-1]0, Status<=5))[0]; Inf=np.where(Status>5)[0]; S.append(len(Sus)); E.append(len(Exp)); I.append(len(Inf)); t.append(t[i-2]+1);i+=1; #Return only the data where a farm was not infected by the "end" day not_inf = to_return[to_return[:,1]==0] #append the status of these farms after the additional end2 - end days newstatus = [Status[i] for i in not_inf[:,0]-1] newstatus_ind = np.array([i>5 for i in newstatus]).astype(int) final = np.c_[not_inf,newstatus_ind] return(final) test = Outbreaks(Size=Size,N=N,Y0=1,farms = np.arange(1,N+1),end=10,end2=50,x=x,y=y,Cows=Cows,Sheep=Sheep) # In[11]: test[:,11] # HOW IS THE WORK BELOW DIFFERENT FROM WHAT YOU HAVE DONE ABOVE? YOU HAD ALREADY SIMULATED COWS AND SHEEP NEAR THE TOP. # In[12]: import numpy as np from random import randint Outbreak = Outbreaks(Size=Size,N=N,Y0=1,farms = np.arange(1,N+1),end=10,end2=50,x=x,y=y,Cows=Cows,Sheep=Sheep) Outbreak = np.c_[np.array([1]*Outbreak.shape[0]),Outbreak] Num_outbreaks = 1000 for i in range(Num_outbreaks): #Cows = np.array([randint(25,250) for p in range(N)]) Cows = np.array([randint(25,51) for p in range(15)]+[randint(51,76) for p in range(30)]+[randint(76,95) for p in range(20)]+[randint(95,250) for p in range(10)]) #Sheep = np.array([randint(25,250) for p in range(N)]) Sheep = np.array([randint(25,51) for p in range(15)]+[randint(51,76) for p in range(30)]+[randint(76,95) for p in range(20)]+[randint(95,250) for p in range(10)]) # ADD IS NOT A GOOD VARIABLE NAME add = Outbreaks(Size=Size,N=N,Y0=1,farms = np.arange(1,N+1),end=10,end2=50,x=x,y=y,Cows=Cows,Sheep=Sheep) add = np.c_[np.array([i+2]*add.shape[0]),add] new_Outbreak = np.r_[Outbreak,add] Outbreak = new_Outbreak print(i,np.sum(add[:,12]==1)) # In[13]: import pandas as pd df = pd.DataFrame(Outbreak) df.columns = ['run','farmID','Status','DistToIndex','NumCows','NumSheep','x-coord','y-coord','DistToNearestInfected','AvgDistToInfected','CowsNearestInfected','SheepNearestInfected','Status2'] long_random_everything_andind_anddist2 = df long_random_everything_andind_anddist2 # In[14]: np.sum(long_random_everything_andind_anddist2['Status2']==1) # In[15]: dist_diff = (long_random_everything_andind_anddist2['DistToNearestInfected'] - long_random_everything_andind_anddist2['DistToNearestInfected'].mean())/5 closest_cow_diff = (long_random_everything_andind_anddist2['CowsNearestInfected'] - long_random_everything_andind_anddist2['CowsNearestInfected'].mean())/20 closest_sheep_diff = (long_random_everything_andind_anddist2['SheepNearestInfected'] - long_random_everything_andind_anddist2['SheepNearestInfected'].mean())/20 status = long_random_everything_andind_anddist2['Status2'] from pymc import Normal, Binomial, Gamma, Lambda, invlogit, MCMC, Matplot, Bernoulli, MAP, AdaptiveMetropolis #N = df.shape[0] def pooled_model(): # Common slope & intercept prior intercept = Normal('intercept', mu=0., tau=0.001, value = 0) first_coef = Normal('first_coef', mu=0., tau=0.001, value = 0) size_coef = Normal ('size_coef', mu=0., tau= 0.001, value=[0]*2) #likelihood model prob = Lambda('prob', lambda intercept=intercept,first_coef=first_coef, size_coef= size_coef: invlogit(intercept + first_coef*dist_diff + size_coef[0]*closest_cow_diff + size_coef[1]*closest_sheep_diff)) y = Bernoulli('y', p=prob, value=status, observed=True) return locals() # In[16]: chains = 2 iterations = 10000 burn = 4000 M_pooled = MCMC(pooled_model()) M_map = MAP(pooled_model()) M_pooled = MCMC(M_map) #M_pooled.use_step_method(AdaptiveMetropolis, M_pooled.b) for i in range(chains): M_pooled.sample(iterations, burn) # In[17]: get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plt Matplot.summary_plot(M_pooled.intercept) # In[18]: get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plt Matplot.summary_plot(M_pooled.first_coef) # In[19]: get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plt Matplot.summary_plot(M_pooled.size_coef) # In[ ]: