;IDL version of mayfly (example 2), to work with mcmc code ;Ankur R Desai, David Moore, John Zobitz, and Michael Chadwick Oct-Dec 2010 ;Citation: ;To run from IDL ;.compile zobitz_example2_mayfly.pro ;mayfly_simulate ;helper function required to be previously compiled or in IDL path ;mcmc.pro ;First we need some functions to sample the data set and create binned averages FUNCTION mayfly_sampling,nmayflies,mayfly=mayfly ;develop a sample of mayflies given a frequency distribution ;%%% Creators: David Moore, JZ ;Adapted to IDL by Ankur Desai ;%%% Modified: 10/14/10, edited 12/22/10 (non-sequential month bug fixed). ;%%% Purpose: Generate a population distribution of nMayflies derived from a ;%%% distribution shown in Chadwick and Feminella 2001. The input variable ;%%% represents the desired maximum Population. ;mayfly is the original data variable ;fix for missing months (see new mayfly.m) IF n_elements(nmayflies) EQ 0 THEN nmayflies = 10000l ;%%% Digitzed data from Chadwick and Feminella 2001 - see file mayfly data.xlsx for explanation, etc. Each row is a length, ;%%% each column is a month ---> The number of columns MUST equal nMonths! ;%%% Data are ;%%% Oct-95 Nov-95 Dec-95 Jan-96 Mar-96 Apr-96 Jun-96 Aug-96 Sep-96 ;%%% Feb-96 (n=5), May-96 (n=8), Jul-96 (n=10). mayfly =[$ 0, 0, 0, 0, 0, 0, 0, 0, 0,$ 7.6087, 1.9802, 0, 0, 0, 0, 0, 0, 0,$ 23.9130, 10.8911, 0, 2.1505, 2.1505, 0, 1.0870, 4.3478, 13.5922,$ 17.3913, 24.7525, 11.8280, 23.6559, 22.5806, 4.5045, 3.2609, 9.7826, 30.0971,$ 19.5652, 36.6337, 31.1828, 34.4086, 32.2581, 31.5315, 6.5217, 14.1304, 33.9806,$ 20.6522, 11.8812, 34.4086, 23.6559, 24.7312, 25.2252, 22.8261, 21.7391, 8.7379,$ 10.8696, 9.9010, 11.8280, 11.8280, 16.1290, 31.5315, 30.4348, 36.9565, 13.5922,$ 0, 3.9604, 7.5269, 4.3011, 2.1505, 7.2072, 22.8261, 13.0435, 0,$ 0, 0, 3.2258, 0, 0, 0, 9.7826, 0, 0,$ 0, 0, 0, 0, 0, 0, 3.2609, 0, 0] ;fill in missing months from original data to make a 12-month data set mayfly = reform(mayfly,9,10) nans = transpose(replicate(!values.f_nan,10)) mayfly = [mayfly[0,*],mayfly[1,*],mayfly[2,*],mayfly[3,*],nans,mayfly[4,*],mayfly[5,*],nans,mayfly[6,*],nans,mayfly[7,*],mayfly[8,*]] nmonths = 12 ;Set up array for mayflies mayflypopulation = Make_array(nmayflies,nmonths,/float,value=!values.f_nan) rseed = total(mayfly,/nan) ;systime(/sec) ;switch to a fixed seed to keep initial condition ;Loop over all months FOR t = 0,nmonths-1 DO BEGIN IF finite(mayfly[t,0]) THEN BEGIN ; %%% We will compare the cumulative sum to a randomly drawn number ; %%% between 0 and 1. sampletime = total(mayfly[t,*],/cum)/100. mayflypopulation[*,t]=0 ;Loop over all size classes FOR i = 0,9 DO BEGIN mayflyprob = randomu(rseed,nmayflies) ;Probability that a mayfly is bigger than sizeclass mayflychance = mayflyprob GE sampletime[i] ;Code below randomly bumps a mayfly up or down a class bumpprob = randomu(rseed,nmayflies) bumpdecide = bumpprob GE 0.5 bumpprob = randomu(rseed,nmayflies) bumpdir = 1+((-2)*(bumpprob LT 0.5)) bumpvalue = bumpdecide*bumpdir ;Add to the population mayflypopulation[*,t]+=mayflychance+mayflychance*bumpvalue ENDFOR ;Remove negative or too big size classes mayflypopulation[*,t]>=0 mayflypopulation[*,t]<=10 ENDIF ENDFOR ;Return some mayflies return,mayflypopulation END ;Given a set of mayflies, compute a frequency distribution FUNCTION mayfly_meanlength,mayflies,nbins=nbins,deltaL=deltaL nmonth = n_elements(mayflies[0,*]) ;Number of size classes IF n_elements(nbins) EQ 0 THEN nbins = 10 ;Size of each class in mm IF n_elements(deltaL) EQ 0 THEN deltaL = 3 maxsize = nbins*deltaL ;The output variable is mean_flies mean_flies = fltarr(nmonth,nbins) FOR i = 0,nmonth-1 DO BEGIN ;Only count flyes that are valid size classes goodflies = where(finite(mayflies[*,i]),nflies) ;Histogram generates a distribution for us (built-in IDL function) IF nflies GT 0 THEN BEGIN mean_flies[i,*] = histogram(reform(mayflies[goodflies,i]),binsize=1,min=0,nbins=nbins) / float(nflies) ENDIF ELSE mean_flies[i,*] = !values.f_nan ENDFOR return,mean_flies END ;MAYFLY DYNAMICS ;First we need a program to compute the Growing Degree Days FUNCTION mayfly_gdd,t_lo,t_hi,temp ;Given a Low temp and high temp, count number of days with temp ;between them ;temp is a two-column array, time in months in first column, ;temperature in degrees C in the second month_l = [fix(min(temp[0,*])),fix(max(temp[0,*]))] months = findgen(month_l[1]-month_l[0]+1)+month_l[0] time = temp[0,*] t = temp[1,*] gdd = fltarr(n_elementS(months)) FOR i = 0,n_elements(months)-1 DO BEGIN index = where(time GE i AND time LT i+1) gr = where(t[index] GE t_lo AND t[index] LT t_hi,ngr) gdd[i] = ngr ENDFOR return,gdd END ;Here is where mayfly are made: FUNCTION mayfly_dynamics,temp,param,init=init,_extra=ex ;Given a temperature array (two-column: month and tempeature) ;and a parameter set ;And also (optional) the initial condition of mayflies ;Compute the forward state of mayflies for the following 11 months ;%%% Parameters (either estimated or set) ;%%% alpha: growth rate (mm / day) ;%%% delta: relative mortality rate ;%%% tempLow: baseline growth rate temperature (Celsius) ;%%% tempHi: upper limit to growth rate temperature (Celsius) ;If initial state is not provided or only number of flies is provided, ;we can produce it here IF n_elements(init) EQ 0 THEN init = 10000 IF n_elements(init) EQ 1 THEN curr_flies = (mayfly_sampling(init))[*,0] ELSE curr_flies = init ;get size of problem nflies = n_elements(curr_flies) nmonths = 12 nsizeclasses = 10 maxsize = 30 sizeincrement =maxsize/nsizeclasses ;extract parameters alpha = param[0] delta = param[1] t_lo = param[2] t_hi = param[3] ;Set up array and copy in initial state mayflyresults = make_array(nflies,nmonths,/float,value=!values.f_nan) mayflyresults[*,0] = curr_flies ;Compute growing degree days growingdays = mayfly_gdd(t_lo,t_hi,temp) ;fixed random seed for numerical stability in mortality function rseed = alpha*100+delta*100+t_lo+t_hi+total(curr_flies,/nan) ;Loop over all months except initial FOR i = 1,nmonths-1 DO BEGIN ;Focus on existing mayflies present = where(finite(mayflyresults[*,i-1]),npresent) IF npresent GT 0 THEN BEGIN ;Grow mayflies and convert into size class units lengthincrement = growingdays[i]*alpha binjumps = fix(lengthincrement/sizeincrement) newflies = (mayflyresults[present,i-1]+binjumps) ;Remove ones that are bigger than largest sizeclass bigflies = where(newflies GE nsizeclasses,nbig) IF nbig GT 0 THEN newflies[bigflies] = !values.f_nan ;Mortality (if delta is not 0) IF (npresent GT 1) AND (delta GT 0.0) THEN BEGIN ;Pick a random number between 0 and nflies*delta nrand = randomu(rseed)*nflies*delta IF long(nrand) GT 0 THEN BEGIN ;Randomly select flies from the population to remove removefly = randomu(rseed,nrand)*npresent newflies[removefly]=!values.f_nan ENDIF ENDIF ;Copy the final mayfly state into array mayflyresults[present,i]=newflies ENDIF ENDFOR ;Make binned mayfly density, which is what the likelihood function needs mean_flies = mayfly_meanlength(mayflyresults,nbins=nbins,deltaL=deltaL) missingbins = where(~finite(mean_flies),nmiss) IF nmiss GT 0 THEN mean_flies[missingbins] = 0.0 return,mean_flies END ;Instead of the default likelihood, we use this one FUNCTION mayfly_likelihood,x,y,param,model,valid,_EXTRA=ex,modout=modout ;only compare after first month modout = (call_FUNCTION(model,x,param,_EXTRA=ex)) modoutt = modout[1:*,*] yy = y[1:*,*] ;deal with missing data or bad model output (e.g., infinite or not a numbers) gv = where((valid EQ 1) AND finite(modoutt) AND finite(yy),nvalid) IF nvalid GT 1 THEN BEGIN sq = total( (modoutt[gv] - yy[gv])^2 ) sigma = sqrt(sq/nvalid) > 1e-32 ;The true log liklihood function (constants removed): ; loglike = (nvalid*alog(sigma)) + (sq/(2*(sigma^2))) ;If you do that math and propagate the constants in, you get loglike = nvalid * alog(4.1327314 * sigma) ;which saves a bit of computation time, which adds up since this ;function is called a lot! ENDIF ELSE BEGIN loglike = 1e31 ENDELSE return,loglike END ;MAIN PROGRAM pro mayfly_simulate ;init population N = 10000 ;Create a sample of N mayflies from the frequency observations sample = mayfly_sampling(N,mayfly=mayfly_obs) ;Create a "true" frequency dist from the mayflies and use this in the ;cost function truth = mayfly_meanlength(sample) init = sample[*,0] ;read temperature data mydocs = '/Users/adesai/Documents/research/data/' temp = (read_ascii(mydocs+'mcmc/temperature-ard.txt')).(0) ;interpolate temperature to 30 days per month temp_tm = findgen(30*12)/30. ;Oct-Sep temp_obs = transpose([[temp_tm],[interpol(temp[1,*],temp[0,*],temp_tm)]]) ;OPTIONAL SECTION ;Set some parameters just to explore nature of code alpha = 0.4 delta = 0.5 t_lo = 10 t_hi = 20 param = [alpha,delta,t_lo,t_hi] ;run program, get fly density ; nbins = 10 ; deltaL = 3 flies = mayfly_dynamics(temp_obs,param,init=init) ;END OPTINAL SECTION print,'Running MCMC' stop ;type .continue to keep going ;mcmc time: ;Set the prior ranges ;param = { name : ['alpha', 'delta', 't_lo', 't_hi'], $ ; value : [0.4, 0.2, 10.0, 25.0], $ ; max : [1.0, 1.0, 20.0, 40.0], $ ; min : [0.0, 0.0, -10.0, 10.0], $ ; changeable : [1 , 1, 1 , 1] } ;if you want to do fixed delta, uncomment this section param = { name : ['alpha', 'delta', 't_lo', 't_hi'], $ value : [0.4, 0.0, 10.0, 25.0], $ max : [1.0, 1.0, 20.0, 40.0], $ min : [0.0, 0.0, -10.0, 10.0], $ changeable : [1 , 0, 1 , 1] } ;call mcmc function mcmc,temp_obs,truth,param,/medium,model='mayfly_dynamics',likelihood='mayfly_likelihood',outputll=ll,outputvalue=p,outputy=y,init=init ;Write to file save,filename=mydocs+'mcmc/mayflyrun.sav',temp_obs,truth,param,ll,p,y,init ;1:1 plot of "truth" to "predicted" densities ;see code below for the plots used in the figures plot,y,truth,psym=1 stop ;pseudo-data retrieval ; mcmc,temp,flies,param,/medium,model='mayfly_dynamics',likelihood='mayfly_likelihood',outputll=ll,outputvalue=p,outputy=y2,init=init ; plot,y2,flies,psym=1 ; stop END ;Code to make figures ;Draw a box PRO mayfly_box,ux,uy,lx,ly,_extra=_extra,outline=outline,nofill=nofill,outcolor=outcolor,data=data,device=device,normal=normal,outthick=outthick ;Draw a box from ux,uy to lx,ly xs = [ux,ux,lx,lx,ux] ys = [uy,ly,ly,uy,uy] IF NOT keyword_set(nofill) THEN polyfill,xs,ys,_extra=_extra,data=data,device=device,normal=normal IF keyword_set(outline) THEN BEGIN plots,xs,ys,color=outcolor,data=data,device=device,normal=normal,thick=outthick ENDIF END ;Draw a histogram plot given a x,y array PRO mayfly_plot,data,outline=outline,nofill=nofill,color=color ;bar plot histogram, assumes plot already up ;uses data values go from 0-1. Each column is a new month, each row is ;a different size class. ncol = n_elements(data[*,0]) nrow = n_elements(data[0,*]) FOR x = 0,ncol-1 DO BEGIN FOR y = 0,nrow-1 DO BEGIN dval = data[x,y] IF finite(dval) AND (dval GT 0) THEN BEGIN xcoords = [((x+1)-(dval/2.0)),((x+1)+(dval/2.0))] ycoords = [y+1.5,y+0.5] mayfly_box,xcoords[0],ycoords[0],xcoords[1],ycoords[1],outline=outline,nofill=nofill,color=color ENDIF ENDFOR ENDFOR END PRO mayfly_figures ;restore the data from mayfly_simulate mydocs = '/Users/adesai/Documents/research/data/' restore,mydocs+'mcmc/mayflyrun_delta.sav' param_d = param ll_d = ll y_d = y p_d = p restore,mydocs+'mcmc/mayflyrun_nodelta.sav' param_nd = param ll_nd = ll y_nd = y p_nd = p ;bar plot for ;delta ;no delta !p.multi = [0,1,3] plot,[0,0],[0,0],/nodata,xrange=[0,13],yrange=[0,11],xticks=13,yticks=11,xtickn=[' ','O','N','D','J','F','M','A','M','J','J','A','S',' '],ytickn=[' ','0-3','4-6','7-9','10-12','13-15','16-18','19-21','22-24','25-27','28-30',' '],ytitle='Total length (mm)' mayfly_plot,y_d*3,color=180 mayfly_plot,truth*3,/nofill,/outline plot,[0,0],[0,0],/nodata,xrange=[0,13],yrange=[0,11],xticks=13,yticks=11,xtickn=[' ','O','N','D','J','F','M','A','M','J','J','A','S',' '],ytickn=[' ','0-3','4-6','7-9','10-12','13-15','16-18','19-21','22-24','25-27','28-30',' '],ytitle='Total length (mm)' mayfly_plot,y_nd*3,color=180 mayfly_plot,truth*3,/nofill,/outline stop ;scatterplot of delta vs likelihood ;vary delta by +/- 50% of best value thep = p_d[*,0] drange = (0.5+(findgen(100)/100.))*thep[1] valid = y_d valid[*] = 1 mayflyll = fltarr(n_elements(drange)) FOR i = 0,n_elements(drange)-1 DO mayflyll[i] = mayfly_likelihood(temp_obs,truth,[thep[0],drange[i],thep[2],thep[3]],'mayfly_dynamics',valid,init=init) !p.multi = [0,2,2] plot,drange,mayflyll,xtitle='Delta',ytitle='Log Likelihod',psym=-1 oplot,[thep[1],thep[1]],[-10000,10000],thick=2 stop ;forward run with a 1 degree temperature rise temp_p = temp_obs temp_p[1,*]+=2 mayflyt = mayfly_dynamics(temp_p,p_d[*,0],init=init) !p.multi = [0,1,3] plot,[0,0],[0,0],/nodata,xrange=[0,13],yrange=[0,11],xticks=13,yticks=11,xtickn=[' ','O','N','D','J','F','M','A','M','J','J','A','S',' '],ytickn=[' ','0-3','4-6','7-9','10-12','13-15','16-18','19-21','22-24','25-27','28-30',' '],ytitle='Total length (mm)' mayfly_plot,mayflyt*3,color=180 mayfly_plot,y_d*3,/nofill,/outline temp_p[1,*]+=2 mayflyt = mayfly_dynamics(temp_p,p_d[*,0],init=init) plot,[0,0],[0,0],/nodata,xrange=[0,13],yrange=[0,11],xticks=13,yticks=11,xtickn=[' ','O','N','D','J','F','M','A','M','J','J','A','S',' '],ytickn=[' ','0-3','4-6','7-9','10-12','13-15','16-18','19-21','22-24','25-27','28-30',' '],ytitle='Total length (mm)' mayfly_plot,mayflyt*3,color=180 mayfly_plot,y_d*3,/nofill,/outline stop ;tables ;prior, posterior for d and nd ;delta - r2, LL, BIC, parameters ;no delta, r2, LL, BIC, parameters pd_max = max(p_d,dimension=2) pd_min = min(p_d,dimension=2) pnd_max = max(p_nd,dimension=2) pnd_min = min(p_nd,dimension=2) print,'Parameter Prior Posterior_Mort Posterior_NoMort' print,'alpha (0-1) ',p_d[0,0],' ',pd_min[0],' ',pd_max[0],' ',p_nd[0,0],' ',pnd_min[0],' ',pnd_max[0] print,'delta (0-1) ',p_d[1,0],' ',pd_min[1],' ',pd_max[1],' ',p_nd[1,0],' ',pnd_min[1],' ',pnd_max[1] print,'T_Low (-10-20) ',p_d[2,0],' ',pd_min[2],' ',pd_max[2],' ',p_nd[2,0],' ',pnd_min[2],' ',pnd_max[2] print,'T_High (10-40) ',p_d[3,0],' ',pd_min[3],' ',pd_max[3],' ',p_nd[3,0],' ',pnd_min[3],' ',pnd_max[3] print t_truth = truth[1:*,*] t_yd = y_d[1:*,*] t_ynd = y_nd[1:*,*] g = where(finite(t_truth),ng) t_truth = t_truth[g] t_yd = t_yd[g] t_ynd = t_ynd[g] print,'Stat Mortality No_Mortality' print,'r2 ',correlate(t_yd,t_truth)^2,' ',correlate(t_ynd,t_truth)^2 print,'Log Likelihood ',ll_d[0],' ',ll_nd[0] print,'BIC ',-2.0*ll_d[0]+4.0*alog(ng),-2.0*ll_nd[0]+3.0*alog(ng) stop END