###
### !!!Only works in UNIX environments!!! See update 2012-06-20 (below).
###
"gbm.step.multic" <-
function (
  data,                                     # the input dataframe
  gbm.x,                                    # the predictors
  gbm.y,                                    # and response
  offset = NULL,                            # allows an offset to be specified
  fold.vector = NULL,                       # allows a fold vector to be read in for CV with offsets,
  tree.complexity = 1,                      # sets the complexity of individual trees
  learning.rate = 0.01,                     # sets the weight applied to inidivudal trees
  bag.fraction = 0.75,                      # sets the proportion of observations used in selecting variables
  site.weights = rep(1, nrow(data)),        # allows varying weighting for sites
  var.monotone = rep(0, length(gbm.x)),     # restricts responses to individual predictors to monotone 
  n.folds = 10,                             # number of folds
  prev.stratify = TRUE,                     # prevalence stratify the folds - only for p/a data
  family = "bernoulli",                     # family - bernoulli (=binomial), poisson, laplace or gaussian
  n.trees = 50,                             # number of initial trees to fit
  step.size = n.trees,                      # numbers of trees to add at each cycle
  max.trees = 10000,                        # max number of trees to fit before stopping
  tolerance.method = "auto",                # method to use in deciding to stop - "fixed" or "auto"
  tolerance = 0.01,                        # tolerance value to use - if method == fixed is absolute, 
                                            # if auto is multiplier * total mean deviance
  keep.data = FALSE,                        # keep raw data in final model
  plot.main = TRUE,                         # plot hold-out deviance curve
  plot.folds = FALSE,                       # plot the individual folds as well
  verbose = TRUE,                           # control amount of screen reporting
  silent = FALSE,                           # to allow running with no output for simplifying model)
  keep.fold.models = FALSE,                 # keep the fold models from cross valiation
  keep.fold.vector = FALSE,                 # allows the vector defining fold membership to be kept
  keep.fold.fit = FALSE,                    # allows the predicted values for observations from CV to be kept
  ...)                                      # allows for any additional plotting parameters
{
#
# j. leathwick/j. elith - 19th September 2005
#
# version 2.9
#
# function to assess optimal no of boosting trees using k-fold cross validation 
#
# implements the cross-validation procedure described on page 215 of 
# Hastie T, Tibshirani R, Friedman JH (2001) The Elements of Statistical Learning: 
# Data Mining, Inference, and Prediction Springer-Verlag, New York.
#
# divides the data into 10 subsets, with stratification by prevalence if required for pa data
# then fits a gbm model of increasing complexity along the sequence from n.trees to n.trees + (n.steps * step.size)
# calculating the residual deviance at each step along the way
# after each fold processed, calculates the average holdout residual deviance and its standard error
# then identifies the optimal number of trees as that at which the holdout deviance is minimised 
# and fits a model with this number of trees, returning it as a gbm model along with additional information 
# from the cv selection process
#
# updated 13/6/05 to accommodate weighting of sites
#
# updated 19/8/05 to increment all folds simultaneously, allowing the stopping rule
# for the maxinum number of trees to be fitted to be imposed by the data, 
# rather than being fixed in advance
#
# updated 29/8/05 to return cv test statistics, and deviance as mean
# time for analysis also returned via unclass(Sys.time())
#
# updated 5/9/05 to use external function calc.deviance 
# and to return cv test stats via predictions formed from fold models 
# with n.trees = target.trees
#
# updated 15/5/06 to calculate variance of fitted and predicted values across folds
# these can be expected to approximate the variance of fitted values
# as would be estimated for example by bootstrapping
# as these will underestimate the true variance 
# they are corrected by multiplying by (n-1)2/n 
# where n is the number of folds
#
# updated 25/3/07 tp allow varying of bag fraction
#
# requires gbm library from Cran
# requires roc and calibration scripts of J Elith
# requires calc.deviance script of J Elith/J Leathwick
#
# Updated 2012-06-20 (Stefan Metzger): support multicore processing (only use in UNIX environments!)
# To support multicore processing a serial backend must be registered
# 	library(doMC)
#	registerDoMC()
# However doMC depends on the packages >foreach<, >iterators< and >multicore<,
# of which the >multicore< package is thus far fully functional only 
# under UNIX environments. Using this function without a registered serial backend 
# (e.g., in Windows) will produce an error "$ operator is invalid for atomic vectors" 
# and stop the routine. Error supposedly due to different handling of 
# local / global variables with / without registered serial backend.

# libraries  
  require(gbm)
  require(foreach)
  require(doMC)
	registerDoMC()
  
  if (silent) verbose <- FALSE

# initiate timing call

  z1 <- unclass(Sys.time())

# setup input data and assign to position one

  dataframe.name <- deparse(substitute(data))   # get the dataframe name

  data <- eval(data)
  x.data <- eval(data[, gbm.x])                 #form the temporary datasets
  names(x.data) <- names(data)[gbm.x]
  y.data <- eval(data[, gbm.y])
  sp.name <- names(data)[gbm.y]
  if (family == "bernoulli") prevalence <- mean(y.data)

  assign("x.data", x.data, env = globalenv())               #and assign them for later use
  assign("y.data", y.data, env = globalenv())

  offset.name <- deparse(substitute(offset))   # get the dataframe name
  offset = eval(offset)
  print(summary(offset))

  n.cases <- nrow(data)
  n.preds <- length(gbm.x)

  if (!silent) {
    cat("\n","\n","GBM STEP - version 2.9","\n","\n")
    cat("Performing cross-validation optimisation of a boosted regression tree model \n")
    cat("for",sp.name,"with dataframe",dataframe.name,"and using a family of",family,"\n\n")
    cat("Using",n.cases,"observations and",n.preds,"predictors \n\n")
  }

# set up the selector variable either with or without prevalence stratification

  if (is.null(fold.vector)) {

    if (prev.stratify & family == "bernoulli") {
      presence.mask <- data[,gbm.y] == 1
      absence.mask <- data[,gbm.y] == 0
      n.pres <- sum(presence.mask)
      n.abs <- sum(absence.mask)

# create a vector of randomised numbers and feed into presences
      selector <- rep(0,n.cases)
      temp <- rep(seq(1, n.folds, by = 1), length = n.pres)
      temp <- temp[order(runif(n.pres, 1, 100))]
      selector[presence.mask] <- temp

# and then do the same for absences
      temp <- rep(seq(1, n.folds, by = 1), length = n.abs)
      temp <- temp[order(runif(n.abs, 1, 100))]
      selector[absence.mask] <- temp
      
    } else {  #otherwise make them random with respect to presence/absence
      selector <- rep(seq(1, n.folds, by = 1), length = n.cases)
      selector <- selector[order(runif(n.cases, 1, 100))]
    }
  } else {
    if (length(fold.vector) != n.cases) stop("supplied fold vector is of wrong length")
    cat("loading user-supplied fold vector \n\n")
    selector <- eval(fold.vector)
  }

# set up the storage space for results

  pred.values <- rep(0, n.cases)

  cv.loss.matrix <- matrix(0, nrow = n.folds, ncol = 1)
  training.loss.matrix <- matrix(0, nrow = n.folds, ncol = 1)
  trees.fitted <- n.trees

  model.list <- list(paste("model",c(1:n.folds),sep=""))     # dummy list for the tree models

# set up the initial call to gbm

  if (is.null(offset)) {
    gbm.call <- paste("gbm(y.subset ~ .,data=x.subset, n.trees = n.trees,
    interaction.depth = tree.complexity, shrinkage = learning.rate, 
    bag.fraction = bag.fraction, weights = weight.subset, 
    distribution = as.character(family), var.monotone = var.monotone, 
    verbose = FALSE)", sep="")
  } else {
    gbm.call <- paste("gbm(y.subset ~ . + offset(offset.subset),
    data=x.subset, n.trees = n.trees,
    interaction.depth = tree.complexity, shrinkage = learning.rate, 
    bag.fraction = bag.fraction, weights = weight.subset, 
    distribution = as.character(family), var.monotone = var.monotone, 
    verbose = FALSE)", sep="")
  } 

  n.fitted <- n.trees

# calculate the total deviance
               
  y_i <- y.data

  u_i <- sum(y.data * site.weights) / sum(site.weights)
  u_i <- rep(u_i,length(y_i))

  total.deviance <- calc.deviance(y_i, u_i, weights = site.weights, family = family, calc.mean = FALSE)

  mean.total.deviance <- total.deviance/n.cases

  tolerance.test <- tolerance

  if (tolerance.method == "auto") {
     tolerance.test <- mean.total.deviance * tolerance
  }

# now step through the folds setting up the initial call

  if (!silent){ 

    cat("creating",n.folds,"initial models of",n.trees,"trees","\n")
  
    if (prev.stratify & family == "bernoulli") cat("\n","folds are stratified by prevalence","\n","\n")
      else cat("\n","folds are unstratified","\n","\n")
  
    cat ("total mean deviance = ",round(mean.total.deviance,4),"\n","\n")

    cat("tolerance is fixed at ",round(tolerance.test,4),"\n","\n")

    if (tolerance.method != "fixed" & tolerance.method != "auto") {
      cat("invalid argument for tolerance method - should be auto or fixed","\n")
      return()}
  }

  if (verbose) cat("ntrees resid. dev.","\n")


###
  #beginning of first loop (line 219 - 266)
#  for (i in 1:n.folds) {
  FEout <- foreach(i = 1:n.folds) %dopar% {#, .combine = "list"
  #i <- 1
###
    

    model.mask <- selector != i  #used to fit model on majority of data
    pred.mask <- selector == i   #used to identify the with-held subset

    y.subset <- y.data[model.mask]
    x.subset <- x.data[model.mask,]
    weight.subset <- site.weights[model.mask]

    if (!is.null(offset)) {
      offset.subset <- offset[model.mask] 
    } else {
      offset.subset <- NULL
    }

    model.list <- eval(parse(text = gbm.call))

    fitted.values <- model.list$fit  #predict.gbm(model.list, x.subset, type = "response", n.trees = n.trees)
    if (!is.null(offset)) fitted.values <- fitted.values + offset[model.mask]
    if (family == "bernoulli") fitted.values <- exp(fitted.values)/(1 + exp(fitted.values))
    if (family == "poisson") fitted.values <- exp(fitted.values)

    pred.values[pred.mask] <- predict.gbm(model.list, x.data[pred.mask, ], n.trees = n.trees)
    if (!is.null(offset)) pred.values[pred.mask] <- pred.values[pred.mask] + offset[pred.mask]
    if (family == "bernoulli") pred.values[pred.mask] <- exp(pred.values[pred.mask])/(1 + exp(pred.values[pred.mask]))
    if (family == "poisson") pred.values[pred.mask] <- exp(pred.values[pred.mask])

# calc training deviance

    y_i <- y.subset
    u_i <- fitted.values
    weight.fitted <- site.weights[model.mask]
    training.loss.matrix[i,1] <- calc.deviance(y_i, u_i, weight.fitted, family = family)

# calc holdout deviance

    y_i <- y.data[pred.mask]
    u_i <- pred.values[pred.mask]
    weight.preds <- site.weights[pred.mask]
    cv.loss <- calc.deviance(y_i, u_i, weight.preds, family = family)
    
#return result
    list(
      model.list,
      cv.loss
    )
    
###
  } # end of first loop
  model.list <- lapply(1:length(FEout), function(i) FEout[[i]][[1]])  
  cv.loss.matrix[,1] <- sapply(1:length(FEout), function(i) FEout[[i]][[2]])
  rm(FEout)  
###

  
  delta.deviance <- 1

  cv.loss.values <- apply(cv.loss.matrix,2,mean)
  if (verbose) cat(n.fitted,"  ",round(cv.loss.values,4),"\n","\n")

  if (!silent) cat("")
  if (!silent) cat("now adding trees...","\n")

  j <- 1

  
###
  #start of while loop
  while (delta.deviance > tolerance.test & n.fitted < max.trees) {  
###

    
# add a new column to the results matrice..

    training.loss.matrix <- cbind(training.loss.matrix,rep(0,n.folds))
    cv.loss.matrix <- cbind(cv.loss.matrix,rep(0,n.folds))

    n.fitted <- n.fitted + step.size
    trees.fitted <- c(trees.fitted,n.fitted)
  
    j <- j + 1
    
    
  ###
    # beginning of inner loop
#    for (i in 1:n.folds) {
    FEout <- foreach(i = 1:n.folds) %dopar% { #, .combine = "rbind"
    #i <- 1
  ###
      

      model.mask <- selector != i  #used to fit model on majority of data
      pred.mask <- selector == i   #used to identify the with-held subset

      y.subset <- y.data[model.mask]
      x.subset <- x.data[model.mask,]
      weight.subset <- site.weights[model.mask]
      if (!is.null(offset)) {
        offset.subset <- offset[model.mask] 
      }

      model.list <- gbm.more(model.list[[i]], weights = weight.subset, step.size)

      fitted.values <- model.list$fit # predict.gbm(model.list,x.subset, type = "response", n.trees = n.fitted) 
      if (!is.null(offset)) fitted.values <- fitted.values + offset[model.mask]
      if (family == "bernoulli") fitted.values <- exp(fitted.values)/(1 + exp(fitted.values))
      if (family == "poisson") fitted.values <- exp(fitted.values)

      pred.values[pred.mask] <- predict.gbm(model.list, x.data[pred.mask, ], n.trees = n.fitted)
      if (!is.null(offset)) pred.values[pred.mask] <- pred.values[pred.mask] + offset[pred.mask]
      if (family == "bernoulli") pred.values[pred.mask] <- exp(pred.values[pred.mask])/(1 + exp(pred.values[pred.mask]))
      if (family == "poisson") pred.values[pred.mask] <- exp(pred.values[pred.mask])
   
# calculate training deviance

      y_i <- y.subset
      u_i <- fitted.values
      weight.fitted <- site.weights[model.mask]
      training.loss.matrix[i,j] <- calc.deviance(y_i, u_i, weight.fitted, family = family)

# calc holdout deviance

      u_i <- pred.values[pred.mask]
      y_i <- y.data[pred.mask]
      weight.preds <- site.weights[pred.mask]
      #cv.loss.matrix[i,j] <- 
      cv.loss <- calc.deviance(y_i, u_i, weight.preds, family = family)
      
      #return result
        list(
          model.list,
          cv.loss
        )

      
  ###
    }
    # end of inner loop
    model.list <- lapply(1:length(FEout), function(i) FEout[[i]][[1]])  
    cv.loss.matrix[,j] <- sapply(1:length(FEout), function(i) FEout[[i]][[2]])
    rm(FEout)
  ###
    
    
    cv.loss.values <- apply(cv.loss.matrix,2,mean)

    if (j < 5) {
      if (cv.loss.values[j] > cv.loss.values[j-1]) {
        if (!silent) cat("restart model with a smaller learning rate or smaller step size...")
        return()
      }
    }

      if (j >= 20) {   #calculate stopping rule value
        test1 <- mean(cv.loss.values[(j-9):j])
        test2 <- mean(cv.loss.values[(j-19):(j-9)])
        delta.deviance <- test2 - test1
      }

      if (verbose) cat(n.fitted," ",round(cv.loss.values[j],4),"\n")    

    
###
  # end of while loop
  }
###
  
  
# now begin process of calculating optimal number of trees

  training.loss.values <- apply(training.loss.matrix,2,mean)

  cv.loss.ses <- rep(0,length(cv.loss.values))
  cv.loss.ses <- sqrt(apply(cv.loss.matrix,2,var)) / sqrt(n.folds)

# find the target holdout deviance

  y.bar <- min(cv.loss.values) 

# plot out the resulting curve of holdout deviance 

  if (plot.main) {

    y.min <- min(cv.loss.values - cv.loss.ses)  #je added multiplier 10/8/05
    y.max <- max(cv.loss.values + cv.loss.ses)  #je added multiplier 10/8/05

    if (plot.folds) {
      y.min <- min(cv.loss.matrix)
      y.max <- max(cv.loss.matrix) }

      plot(trees.fitted, cv.loss.values, type = 'l', ylab = "holdout deviance",
          xlab = "no. of trees", ylim = c(y.min,y.max)) #, ...
      abline(h = y.bar, col = 2)

      lines(trees.fitted, cv.loss.values + cv.loss.ses, lty=2)  
      lines(trees.fitted, cv.loss.values - cv.loss.ses, lty=2)  

      if (plot.folds) {
        for (i in 1:n.folds) {
          lines(trees.fitted, cv.loss.matrix[i,],lty = 3)
      }
    }
  }
    
# identify the optimal number of trees 

  target.trees <- trees.fitted[match(TRUE,cv.loss.values == y.bar)]

  if(plot.main) {
    abline(v = target.trees, col=3)
    title(paste(sp.name,", d - ",tree.complexity,", lr - ",learning.rate, sep=""))
  }

# estimate the cv deviance and test statistics
# includes estimates of the standard error of the fitted values added 2nd may 2005

  cv.deviance.stats <- rep(0, n.folds)
  cv.roc.stats <- rep(0, n.folds)
  cv.cor.stats <- rep(0, n.folds)
  cv.calibration.stats <- matrix(0, ncol=5, nrow = n.folds)
  if (family == "bernoulli") threshold.stats <- rep(0, n.folds)

  fitted.matrix <- matrix(NA, nrow = n.cases, ncol = n.folds)  # used to calculate se's
  fold.fit <- rep(0,n.cases)

  for (i in 1:n.folds) {

    pred.mask <- selector == i   #used to identify the with-held subset
    model.mask <- selector != i  #used to fit model on majority of data

    fits <- predict.gbm(model.list[[i]], x.data[model.mask, ], n.trees = target.trees)
    if (!is.null(offset)) fits <- fits + offset[model.mask]
    if (family == "bernoulli") fits <- exp(fits)/(1 + exp(fits))
    if (family == "poisson") fits <- exp(fits)
    fitted.matrix[model.mask,i] <- fits

    fits <- predict.gbm(model.list[[i]], x.data[pred.mask, ], n.trees = target.trees)
    if (!is.null(offset)) fits <- fits + offset[pred.mask]
    fold.fit[pred.mask] <- fits  # store the linear predictor values
    if (family == "bernoulli") fits <- exp(fits)/(1 + exp(fits))
    if (family == "poisson") fits <- exp(fits)
    fitted.matrix[pred.mask,i] <- fits

    y_i <- y.data[pred.mask] 
    u_i <- fitted.matrix[pred.mask,i]  #pred.values[pred.mask]
    weight.preds <- site.weights[pred.mask]

    cv.deviance.stats[i] <- calc.deviance(y_i, u_i, weight.preds, family = family)

    cv.cor.stats[i] <- cor(y_i,u_i)

    if (family == "bernoulli") {
      cv.roc.stats[i] <- roc(y_i,u_i)
      cv.calibration.stats[i,] <- calibration(y_i,u_i,"binomial")
      threshold.stats[i] <- approx(ppoints(u_i), sort(u_i,decreasing = T), prevalence)$y
    }

    if (family == "poisson") {
      cv.calibration.stats[i,] <- calibration(y_i,u_i,"poisson")
    }
  }

  fitted.vars <- apply(fitted.matrix,1, var, na.rm = TRUE)

# now calculate the mean and se's for the folds

  cv.dev <- mean(cv.deviance.stats, na.rm = TRUE)
  cv.dev.se <- sqrt(var(cv.deviance.stats)) / sqrt(n.folds)

  cv.cor <- mean(cv.cor.stats, na.rm = TRUE)
  cv.cor.se <- sqrt(var(cv.cor.stats, use = "complete.obs")) / sqrt(n.folds)

  cv.roc <- 0.0
  cv.roc.se <- 0.0 

  if (family == "bernoulli") {
    cv.roc <- mean(cv.roc.stats,na.rm=TRUE)
    cv.roc.se <- sqrt(var(cv.roc.stats, use = "complete.obs")) / sqrt(n.folds)  
    cv.threshold <- mean(threshold.stats, na.rm = T)
    cv.threshold.se <- sqrt(var(threshold.stats, use = "complete.obs")) / sqrt(n.folds)  
  }
 
  cv.calibration <- 0.0
  cv.calibration.se <- 0.0

  if (family == "poisson" | family == "bernoulli") {
    cv.calibration <- apply(cv.calibration.stats,2,mean)
    cv.calibration.se <- apply(cv.calibration.stats,2,var)
    cv.calibration.se <- sqrt(cv.calibration.se) / sqrt(n.folds) }

# fit the final model

  if (is.null(offset)) {
    gbm.call <- paste("gbm(y.data ~ .,data=x.data, n.trees = target.trees,
    interaction.depth = tree.complexity, shrinkage = learning.rate, 
    bag.fraction = bag.fraction, weights = site.weights, 
    distribution = as.character(family), var.monotone = var.monotone, 
    verbose = FALSE)", sep="")
    }
  else {
    gbm.call <- paste("gbm(y.data ~ . + offset(offset),data=x.data, n.trees = target.trees,
    interaction.depth = tree.complexity, shrinkage = learning.rate, 
    bag.fraction = bag.fraction, weights = site.weights, 
    distribution = as.character(family), var.monotone = var.monotone, 
    verbose = FALSE)", sep="")
    } 

  if (!silent) cat("fitting final gbm model with a fixed number of ",target.trees," trees for ",sp.name,"\n") 

  gbm.object <- eval(parse(text = gbm.call))

  best.trees <- target.trees

#extract fitted values and summary table
  
  gbm.summary <- summary(gbm.object,n.trees = target.trees, plotit = FALSE)

  fits <- predict.gbm(gbm.object,x.data,n.trees = target.trees)
  if (!is.null(offset)) fits <- fits + offset
  if (family == "bernoulli") fits <- exp(fits)/(1 + exp(fits))
  if (family == "poisson") fits <- exp(fits)
  fitted.values <- fits

  y_i <- y.data
  u_i <- fitted.values
  resid.deviance <- calc.deviance(y_i, u_i, weights = site.weights, family = family, calc.mean = FALSE)

  self.cor <- cor(y_i,u_i)
  self.calibration <- 0.0
  self.roc <- 0.0

  if (family == "bernoulli") {  # do this manually as we need the residuals
    deviance.contribs <- (y_i * log(u_i)) + ((1-y_i) * log(1 - u_i))
    residuals <- sqrt(abs(deviance.contribs * 2))
    residuals <- ifelse((y_i - u_i) < 0, 0 - residuals, residuals)
    self.roc <- roc(y_i,u_i)
    self.calibration <- calibration(y_i,u_i,"binomial")
  }

  if (family == "poisson") {   # do this manually as we need the residuals
    deviance.contribs <- ifelse(y_i == 0, 0, (y_i * log(y_i/u_i))) - (y_i - u_i)
    residuals <- sqrt(abs(deviance.contribs * 2))
    residuals <- ifelse((y_i - u_i) < 0, 0 - residuals, residuals)
    self.calibration <- calibration(y_i,u_i,"poisson")
  }

  if (family == "gaussian" | family == "laplace") {
    residuals <- y_i - u_i
  }

  mean.resid.deviance <- resid.deviance/n.cases

  z2 <- unclass(Sys.time())
  elapsed.time.minutes <- round((z2 - z1)/ 60,2)  #calculate the total elapsed time

  if (verbose) {
    cat("\n")
    cat("mean total deviance =", round(mean.total.deviance,3),"\n")
    cat("mean residual deviance =", round(mean.resid.deviance,3),"\n","\n")
    cat("estimated cv deviance =", round(cv.dev,3),"; se =",
      round(cv.dev.se,3),"\n","\n")
    cat("training data correlation =",round(self.cor,3),"\n")
    cat("cv correlation = ",round(cv.cor,3),"; se =",round(cv.cor.se,3),"\n","\n")
    if (family == "bernoulli") {
      cat("training data ROC score =",round(self.roc,3),"\n")
      cat("cv ROC score =",round(cv.roc,3),"; se =",round(cv.roc.se,3),"\n","\n")
    }
    cat("elapsed time - ",round(elapsed.time.minutes,2),"minutes","\n")
  }

  if (n.fitted == max.trees & !silent) {
    cat("\n","########### warning ##########","\n","\n")
    cat("maximum tree limit reached - results may not be optimal","\n")
    cat("  - refit with faster learning rate or increase maximum number of trees","\n") 
  }
    
# now assemble data to be returned

  gbm.detail <- list(dataframe = dataframe.name, gbm.x = gbm.x, predictor.names = names(x.data), 
    gbm.y = gbm.y, response.name = sp.name, offset = offset.name, family = family, tree.complexity = tree.complexity, 
    learning.rate = learning.rate, bag.fraction = bag.fraction, cv.folds = n.folds, 
    prev.stratification = prev.stratify, max.fitted = n.fitted, n.trees = target.trees, 
    best.trees = target.trees, train.fraction = 1.0, tolerance.method = tolerance.method, 
    tolerance = tolerance, var.monotone = var.monotone, date = date(), 
    elapsed.time.minutes = elapsed.time.minutes)

  training.stats <- list(null = total.deviance, mean.null = mean.total.deviance, 
    resid = resid.deviance, mean.resid = mean.resid.deviance, correlation = self.cor, 
    discrimination = self.roc, calibration = self.calibration)

  cv.stats <- list(deviance.mean = cv.dev, deviance.se = cv.dev.se, 
    correlation.mean = cv.cor, correlation.se = cv.cor.se,
    discrimination.mean = cv.roc, discrimination.se = cv.roc.se,
    calibration.mean = cv.calibration, calibration.se = cv.calibration.se)

  if (family == "bernoulli") {
    cv.stats$cv.threshold <- cv.threshold
    cv.stats$cv.threshold.se <- cv.threshold.se
  }

  rm(x.data,y.data, envir = globalenv())           #finally, clean up the temporary dataframes

# and assemble results for return

  gbm.object$gbm.call <- gbm.detail
  gbm.object$fitted <- fitted.values
  gbm.object$fitted.vars <- fitted.vars
  gbm.object$residuals <- residuals
  gbm.object$contributions <- gbm.summary
  gbm.object$self.statistics <- training.stats
  gbm.object$cv.statistics <- cv.stats
  gbm.object$weights <- site.weights
  gbm.object$trees.fitted <- trees.fitted
  gbm.object$training.loss.values <- training.loss.values
  gbm.object$cv.values <- cv.loss.values
  gbm.object$cv.loss.ses <- cv.loss.ses
  gbm.object$cv.loss.matrix <- cv.loss.matrix
  gbm.object$cv.roc.matrix <- cv.roc.stats

  if (keep.fold.models) gbm.object$fold.models <- model.list
  else gbm.object$fold.models <- NULL

  if (keep.fold.vector) gbm.object$fold.vector <- selector
  else gbm.object$fold.vector <- NULL

  if (keep.fold.fit) gbm.object$fold.fit <- fold.fit
  else gbm.object$fold.fit <- NULL

  return(gbm.object)  
}


"gbm.step" <-
function (data,                             # the input dataframe
  gbm.x,                                    # the predictors
  gbm.y,                                    # and response
  offset = NULL,                            # allows an offset to be specified
  fold.vector = NULL,                       # allows a fold vector to be read in for CV with offsets,
  tree.complexity = 1,                      # sets the complexity of individual trees
  learning.rate = 0.01,                     # sets the weight applied to inidivudal trees
  bag.fraction = 0.75,                      # sets the proportion of observations used in selecting variables
  site.weights = rep(1, nrow(data)),        # allows varying weighting for sites
  var.monotone = rep(0, length(gbm.x)),     # restricts responses to individual predictors to monotone 
  n.folds = 10,                             # number of folds
  prev.stratify = TRUE,                     # prevalence stratify the folds - only for p/a data
  family = "bernoulli",                     # family - bernoulli (=binomial), poisson, laplace or gaussian
  n.trees = 50,                             # number of initial trees to fit
  step.size = n.trees,                      # numbers of trees to add at each cycle
  max.trees = 10000,                        # max number of trees to fit before stopping
  tolerance.method = "auto",                # method to use in deciding to stop - "fixed" or "auto"
  tolerance = 0.001,                        # tolerance value to use - if method == fixed is absolute, 
                                            # if auto is multiplier * total mean deviance
  keep.data = FALSE,                        # keep raw data in final model
  plot.main = TRUE,                         # plot hold-out deviance curve
  plot.folds = FALSE,                       # plot the individual folds as well
  verbose = TRUE,                           # control amount of screen reporting
  silent = FALSE,                           # to allow running with no output for simplifying model)
  keep.fold.models = FALSE,                 # keep the fold models from cross valiation
  keep.fold.vector = FALSE,                 # allows the vector defining fold membership to be kept
  keep.fold.fit = FALSE,                    # allows the predicted values for observations from CV to be kept
  ...)                                      # allows for any additional plotting parameters
{
#
# j. leathwick/j. elith - 19th September 2005
#
# version 2.9
#
# function to assess optimal no of boosting trees using k-fold cross validation 
#
# implements the cross-validation procedure described on page 215 of 
# Hastie T, Tibshirani R, Friedman JH (2001) The Elements of Statistical Learning: 
# Data Mining, Inference, and Prediction Springer-Verlag, New York.
#
# divides the data into 10 subsets, with stratification by prevalence if required for pa data
# then fits a gbm model of increasing complexity along the sequence from n.trees to n.trees + (n.steps * step.size)
# calculating the residual deviance at each step along the way
# after each fold processed, calculates the average holdout residual deviance and its standard error
# then identifies the optimal number of trees as that at which the holdout deviance is minimised 
# and fits a model with this number of trees, returning it as a gbm model along with additional information 
# from the cv selection process
#
# updated 13/6/05 to accommodate weighting of sites
#
# updated 19/8/05 to increment all folds simultaneously, allowing the stopping rule
# for the maxinum number of trees to be fitted to be imposed by the data, 
# rather than being fixed in advance
#
# updated 29/8/05 to return cv test statistics, and deviance as mean
# time for analysis also returned via unclass(Sys.time())
#
# updated 5/9/05 to use external function calc.deviance 
# and to return cv test stats via predictions formed from fold models 
# with n.trees = target.trees
#
# updated 15/5/06 to calculate variance of fitted and predicted values across folds
# these can be expected to approximate the variance of fitted values
# as would be estimated for example by bootstrapping
# as these will underestimate the true variance 
# they are corrected by multiplying by (n-1)2/n 
# where n is the number of folds
#
# updated 25/3/07 tp allow varying of bag fraction
#
# requires gbm library from Cran
# requires roc and calibration scripts of J Elith
# requires calc.deviance script of J Elith/J Leathwick
#
#

  require(gbm)

  if (silent) verbose <- FALSE

# initiate timing call

  z1 <- unclass(Sys.time())

# setup input data and assign to position one

  dataframe.name <- deparse(substitute(data))   # get the dataframe name

  data <- eval(data)
  x.data <- eval(data[, gbm.x])                 #form the temporary datasets
  names(x.data) <- names(data)[gbm.x]
  y.data <- eval(data[, gbm.y])
  sp.name <- names(data)[gbm.y]
  if (family == "bernoulli") prevalence <- mean(y.data)

  assign("x.data", x.data, env = globalenv())               #and assign them for later use
  assign("y.data", y.data, env = globalenv())

  offset.name <- deparse(substitute(offset))   # get the dataframe name
  offset = eval(offset)
  print(summary(offset))

  n.cases <- nrow(data)
  n.preds <- length(gbm.x)

  if (!silent) {
    cat("\n","\n","GBM STEP - version 2.9","\n","\n")
    cat("Performing cross-validation optimisation of a boosted regression tree model \n")
    cat("for",sp.name,"with dataframe",dataframe.name,"and using a family of",family,"\n\n")
    cat("Using",n.cases,"observations and",n.preds,"predictors \n\n")
  }

# set up the selector variable either with or without prevalence stratification

  if (is.null(fold.vector)) {

    if (prev.stratify & family == "bernoulli") {
      presence.mask <- data[,gbm.y] == 1
      absence.mask <- data[,gbm.y] == 0
      n.pres <- sum(presence.mask)
      n.abs <- sum(absence.mask)

# create a vector of randomised numbers and feed into presences
      selector <- rep(0,n.cases)
      temp <- rep(seq(1, n.folds, by = 1), length = n.pres)
      temp <- temp[order(runif(n.pres, 1, 100))]
      selector[presence.mask] <- temp

# and then do the same for absences
      temp <- rep(seq(1, n.folds, by = 1), length = n.abs)
      temp <- temp[order(runif(n.abs, 1, 100))]
      selector[absence.mask] <- temp
      }  

    else {  #otherwise make them random with respect to presence/absence
      selector <- rep(seq(1, n.folds, by = 1), length = n.cases)
      selector <- selector[order(runif(n.cases, 1, 100))]
      }
    }
  else {
    if (length(fold.vector) != n.cases) stop("supplied fold vector is of wrong length")
    cat("loading user-supplied fold vector \n\n")
    selector <- eval(fold.vector)
    }

# set up the storage space for results

  pred.values <- rep(0, n.cases)

  cv.loss.matrix <- matrix(0, nrow = n.folds, ncol = 1)
  training.loss.matrix <- matrix(0, nrow = n.folds, ncol = 1)
  trees.fitted <- n.trees

  model.list <- list(paste("model",c(1:n.folds),sep=""))     # dummy list for the tree models

# set up the initial call to gbm

  if (is.null(offset)) {
    gbm.call <- paste("gbm(y.subset ~ .,data=x.subset, n.trees = n.trees,
    interaction.depth = tree.complexity, shrinkage = learning.rate, 
    bag.fraction = bag.fraction, weights = weight.subset, 
    distribution = as.character(family), var.monotone = var.monotone, 
    verbose = FALSE)", sep="")
    }
  else {
    gbm.call <- paste("gbm(y.subset ~ . + offset(offset.subset),
    data=x.subset, n.trees = n.trees,
    interaction.depth = tree.complexity, shrinkage = learning.rate, 
    bag.fraction = bag.fraction, weights = weight.subset, 
    distribution = as.character(family), var.monotone = var.monotone, 
    verbose = FALSE)", sep="")
    } 

  n.fitted <- n.trees

# calculate the total deviance
               
  y_i <- y.data

  u_i <- sum(y.data * site.weights) / sum(site.weights)
  u_i <- rep(u_i,length(y_i))

  total.deviance <- calc.deviance(y_i, u_i, weights = site.weights, family = family, calc.mean = FALSE)

  mean.total.deviance <- total.deviance/n.cases

  tolerance.test <- tolerance

  if (tolerance.method == "auto") {
     tolerance.test <- mean.total.deviance * tolerance
  }

# now step through the folds setting up the initial call

  if (!silent){ 

    cat("creating",n.folds,"initial models of",n.trees,"trees","\n")
  
    if (prev.stratify & family == "bernoulli") cat("\n","folds are stratified by prevalence","\n","\n")
      else cat("\n","folds are unstratified","\n","\n")
  
    cat ("total mean deviance = ",round(mean.total.deviance,4),"\n","\n")

    cat("tolerance is fixed at ",round(tolerance.test,4),"\n","\n")

    if (tolerance.method != "fixed" & tolerance.method != "auto") {
      cat("invalid argument for tolerance method - should be auto or fixed","\n")
      return()}
  }

  if (verbose) cat("ntrees resid. dev.","\n")

  for (i in 1:n.folds) {

    model.mask <- selector != i  #used to fit model on majority of data
    pred.mask <- selector == i   #used to identify the with-held subset

    y.subset <- y.data[model.mask]
    x.subset <- x.data[model.mask,]
    weight.subset <- site.weights[model.mask]

    if (!is.null(offset)) {
      offset.subset <- offset[model.mask] 
    }
    else {
      offset.subset <- NULL
    }

    model.list[[i]] <- eval(parse(text = gbm.call))

    fitted.values <- model.list[[i]]$fit  #predict.gbm(model.list[[i]], x.subset, type = "response", n.trees = n.trees)
    if (!is.null(offset)) fitted.values <- fitted.values + offset[model.mask]
    if (family == "bernoulli") fitted.values <- exp(fitted.values)/(1 + exp(fitted.values))
    if (family == "poisson") fitted.values <- exp(fitted.values)

    pred.values[pred.mask] <- predict.gbm(model.list[[i]], x.data[pred.mask, ], n.trees = n.trees)
    if (!is.null(offset)) pred.values[pred.mask] <- pred.values[pred.mask] + offset[pred.mask]
    if (family == "bernoulli") pred.values[pred.mask] <- exp(pred.values[pred.mask])/(1 + exp(pred.values[pred.mask]))
    if (family == "poisson") pred.values[pred.mask] <- exp(pred.values[pred.mask])

# calc training deviance

    y_i <- y.subset
    u_i <- fitted.values
    weight.fitted <- site.weights[model.mask]
    training.loss.matrix[i,1] <- calc.deviance(y_i, u_i, weight.fitted, family = family)

# calc holdout deviance

    y_i <- y.data[pred.mask]
    u_i <- pred.values[pred.mask]
    weight.preds <- site.weights[pred.mask]
    cv.loss.matrix[i,1] <- calc.deviance(y_i, u_i, weight.preds, family = family)

  } # end of first loop

# now process until the change in mean deviance is =< tolerance or max.trees is exceeded

  delta.deviance <- 1

  cv.loss.values <- apply(cv.loss.matrix,2,mean)
  if (verbose) cat(n.fitted,"  ",round(cv.loss.values,4),"\n","\n")

  if (!silent) cat("")
  if (!silent) cat("now adding trees...","\n")

  j <- 1

  while (delta.deviance > tolerance.test & n.fitted < max.trees) {  # beginning of inner loop

# add a new column to the results matrice..

    training.loss.matrix <- cbind(training.loss.matrix,rep(0,n.folds))
    cv.loss.matrix <- cbind(cv.loss.matrix,rep(0,n.folds))

    n.fitted <- n.fitted + step.size
    trees.fitted <- c(trees.fitted,n.fitted)
  
    j <- j + 1
 
    for (i in 1:n.folds) {

      model.mask <- selector != i  #used to fit model on majority of data
      pred.mask <- selector == i   #used to identify the with-held subset

      y.subset <- y.data[model.mask]
      x.subset <- x.data[model.mask,]
      weight.subset <- site.weights[model.mask]
      if (!is.null(offset)) {
        offset.subset <- offset[model.mask] 
      }

      model.list[[i]] <- gbm.more(model.list[[i]], weights = weight.subset, step.size)

      fitted.values <- model.list[[i]]$fit # predict.gbm(model.list[[i]],x.subset, type = "response", n.trees = n.fitted) 
      if (!is.null(offset)) fitted.values <- fitted.values + offset[model.mask]
      if (family == "bernoulli") fitted.values <- exp(fitted.values)/(1 + exp(fitted.values))
      if (family == "poisson") fitted.values <- exp(fitted.values)

      pred.values[pred.mask] <- predict.gbm(model.list[[i]], x.data[pred.mask, ], n.trees = n.fitted)
      if (!is.null(offset)) pred.values[pred.mask] <- pred.values[pred.mask] + offset[pred.mask]
      if (family == "bernoulli") pred.values[pred.mask] <- exp(pred.values[pred.mask])/(1 + exp(pred.values[pred.mask]))
      if (family == "poisson") pred.values[pred.mask] <- exp(pred.values[pred.mask])
   
# calculate training deviance

      y_i <- y.subset
      u_i <- fitted.values
      weight.fitted <- site.weights[model.mask]
      training.loss.matrix[i,j] <- calc.deviance(y_i, u_i, weight.fitted, family = family)

# calc holdout deviance

      u_i <- pred.values[pred.mask]
      y_i <- y.data[pred.mask]
      weight.preds <- site.weights[pred.mask]
      cv.loss.matrix[i,j] <- calc.deviance(y_i, u_i, weight.preds, family = family)

    }  # end of inner loop

    cv.loss.values <- apply(cv.loss.matrix,2,mean)

    if (j < 5) {
      if (cv.loss.values[j] > cv.loss.values[j-1]) {
        if (!silent) cat("restart model with a smaller learning rate or smaller step size...")
        return()
      }
    }

      if (j >= 20) {   #calculate stopping rule value
        test1 <- mean(cv.loss.values[(j-9):j])
        test2 <- mean(cv.loss.values[(j-19):(j-9)])
        delta.deviance <- test2 - test1
      }

      if (verbose) cat(n.fitted," ",round(cv.loss.values[j],4),"\n")    

  } # end of while loop

# now begin process of calculating optimal number of trees

  training.loss.values <- apply(training.loss.matrix,2,mean)

  cv.loss.ses <- rep(0,length(cv.loss.values))
  cv.loss.ses <- sqrt(apply(cv.loss.matrix,2,var)) / sqrt(n.folds)

# find the target holdout deviance

  y.bar <- min(cv.loss.values) 

# plot out the resulting curve of holdout deviance 

  if (plot.main) {

    y.min <- min(cv.loss.values - cv.loss.ses)  #je added multiplier 10/8/05
    y.max <- max(cv.loss.values + cv.loss.ses)  #je added multiplier 10/8/05 }

    if (plot.folds) {
      y.min <- min(cv.loss.matrix)
      y.max <- max(cv.loss.matrix) }

      plot(trees.fitted, cv.loss.values, type = 'l', ylab = "holdout deviance",
          xlab = "no. of trees", ylim = c(y.min,y.max), ...)
      abline(h = y.bar, col = 2)

      lines(trees.fitted, cv.loss.values + cv.loss.ses, lty=2)  
      lines(trees.fitted, cv.loss.values - cv.loss.ses, lty=2)  

      if (plot.folds) {
        for (i in 1:n.folds) {
          lines(trees.fitted, cv.loss.matrix[i,],lty = 3)
      }
    }
  }
    
# identify the optimal number of trees 

  target.trees <- trees.fitted[match(TRUE,cv.loss.values == y.bar)]

  if(plot.main) {
    abline(v = target.trees, col=3)
    title(paste(sp.name,", d - ",tree.complexity,", lr - ",learning.rate, sep=""))
  }

# estimate the cv deviance and test statistics
# includes estimates of the standard error of the fitted values added 2nd may 2005

  cv.deviance.stats <- rep(0, n.folds)
  cv.roc.stats <- rep(0, n.folds)
  cv.cor.stats <- rep(0, n.folds)
  cv.calibration.stats <- matrix(0, ncol=5, nrow = n.folds)
  if (family == "bernoulli") threshold.stats <- rep(0, n.folds)

  fitted.matrix <- matrix(NA, nrow = n.cases, ncol = n.folds)  # used to calculate se's
  fold.fit <- rep(0,n.cases)

  for (i in 1:n.folds) {

    pred.mask <- selector == i   #used to identify the with-held subset
    model.mask <- selector != i  #used to fit model on majority of data

    fits <- predict.gbm(model.list[[i]], x.data[model.mask, ], n.trees = target.trees)
    if (!is.null(offset)) fits <- fits + offset[model.mask]
    if (family == "bernoulli") fits <- exp(fits)/(1 + exp(fits))
    if (family == "poisson") fits <- exp(fits)
    fitted.matrix[model.mask,i] <- fits

    fits <- predict.gbm(model.list[[i]], x.data[pred.mask, ], n.trees = target.trees)
    if (!is.null(offset)) fits <- fits + offset[pred.mask]
    fold.fit[pred.mask] <- fits  # store the linear predictor values
    if (family == "bernoulli") fits <- exp(fits)/(1 + exp(fits))
    if (family == "poisson") fits <- exp(fits)
    fitted.matrix[pred.mask,i] <- fits

    y_i <- y.data[pred.mask] 
    u_i <- fitted.matrix[pred.mask,i]  #pred.values[pred.mask]
    weight.preds <- site.weights[pred.mask]

    cv.deviance.stats[i] <- calc.deviance(y_i, u_i, weight.preds, family = family)

    cv.cor.stats[i] <- cor(y_i,u_i)

    if (family == "bernoulli") {
      cv.roc.stats[i] <- roc(y_i,u_i)
      cv.calibration.stats[i,] <- calibration(y_i,u_i,"binomial")
      threshold.stats[i] <- approx(ppoints(u_i), sort(u_i,decreasing = T), prevalence)$y
    }

    if (family == "poisson") {
      cv.calibration.stats[i,] <- calibration(y_i,u_i,"poisson")
    }
  }

  fitted.vars <- apply(fitted.matrix,1, var, na.rm = TRUE)

# now calculate the mean and se's for the folds

  cv.dev <- mean(cv.deviance.stats, na.rm = TRUE)
  cv.dev.se <- sqrt(var(cv.deviance.stats)) / sqrt(n.folds)

  cv.cor <- mean(cv.cor.stats, na.rm = TRUE)
  cv.cor.se <- sqrt(var(cv.cor.stats, use = "complete.obs")) / sqrt(n.folds)

  cv.roc <- 0.0
  cv.roc.se <- 0.0 

  if (family == "bernoulli") {
    cv.roc <- mean(cv.roc.stats,na.rm=TRUE)
    cv.roc.se <- sqrt(var(cv.roc.stats, use = "complete.obs")) / sqrt(n.folds)  
    cv.threshold <- mean(threshold.stats, na.rm = T)
    cv.threshold.se <- sqrt(var(threshold.stats, use = "complete.obs")) / sqrt(n.folds)  
  }
 
  cv.calibration <- 0.0
  cv.calibration.se <- 0.0

  if (family == "poisson" | family == "bernoulli") {
    cv.calibration <- apply(cv.calibration.stats,2,mean)
    cv.calibration.se <- apply(cv.calibration.stats,2,var)
    cv.calibration.se <- sqrt(cv.calibration.se) / sqrt(n.folds) }

# fit the final model

  if (is.null(offset)) {
    gbm.call <- paste("gbm(y.data ~ .,data=x.data, n.trees = target.trees,
    interaction.depth = tree.complexity, shrinkage = learning.rate, 
    bag.fraction = bag.fraction, weights = site.weights, 
    distribution = as.character(family), var.monotone = var.monotone, 
    verbose = FALSE)", sep="")
    }
  else {
    gbm.call <- paste("gbm(y.data ~ . + offset(offset),data=x.data, n.trees = target.trees,
    interaction.depth = tree.complexity, shrinkage = learning.rate, 
    bag.fraction = bag.fraction, weights = site.weights, 
    distribution = as.character(family), var.monotone = var.monotone, 
    verbose = FALSE)", sep="")
    } 

  if (!silent) cat("fitting final gbm model with a fixed number of ",target.trees," trees for ",sp.name,"\n") 

  gbm.object <- eval(parse(text = gbm.call))

  best.trees <- target.trees

#extract fitted values and summary table
  
  gbm.summary <- summary(gbm.object,n.trees = target.trees, plotit = FALSE)

  fits <- predict.gbm(gbm.object,x.data,n.trees = target.trees)
  if (!is.null(offset)) fits <- fits + offset
  if (family == "bernoulli") fits <- exp(fits)/(1 + exp(fits))
  if (family == "poisson") fits <- exp(fits)
  fitted.values <- fits

  y_i <- y.data
  u_i <- fitted.values
  resid.deviance <- calc.deviance(y_i, u_i, weights = site.weights, family = family, calc.mean = FALSE)

  self.cor <- cor(y_i,u_i)
  self.calibration <- 0.0
  self.roc <- 0.0

  if (family == "bernoulli") {  # do this manually as we need the residuals
    deviance.contribs <- (y_i * log(u_i)) + ((1-y_i) * log(1 - u_i))
    residuals <- sqrt(abs(deviance.contribs * 2))
    residuals <- ifelse((y_i - u_i) < 0, 0 - residuals, residuals)
    self.roc <- roc(y_i,u_i)
    self.calibration <- calibration(y_i,u_i,"binomial")
  }

  if (family == "poisson") {   # do this manually as we need the residuals
    deviance.contribs <- ifelse(y_i == 0, 0, (y_i * log(y_i/u_i))) - (y_i - u_i)
    residuals <- sqrt(abs(deviance.contribs * 2))
    residuals <- ifelse((y_i - u_i) < 0, 0 - residuals, residuals)
    self.calibration <- calibration(y_i,u_i,"poisson")
  }

  if (family == "gaussian" | family == "laplace") {
    residuals <- y_i - u_i
  }

  mean.resid.deviance <- resid.deviance/n.cases

  z2 <- unclass(Sys.time())
  elapsed.time.minutes <- round((z2 - z1)/ 60,2)  #calculate the total elapsed time

  if (verbose) {
    cat("\n")
    cat("mean total deviance =", round(mean.total.deviance,3),"\n")
    cat("mean residual deviance =", round(mean.resid.deviance,3),"\n","\n")
    cat("estimated cv deviance =", round(cv.dev,3),"; se =",
      round(cv.dev.se,3),"\n","\n")
    cat("training data correlation =",round(self.cor,3),"\n")
    cat("cv correlation = ",round(cv.cor,3),"; se =",round(cv.cor.se,3),"\n","\n")
    if (family == "bernoulli") {
      cat("training data ROC score =",round(self.roc,3),"\n")
      cat("cv ROC score =",round(cv.roc,3),"; se =",round(cv.roc.se,3),"\n","\n")
    }
    cat("elapsed time - ",round(elapsed.time.minutes,2),"minutes","\n")
  }

  if (n.fitted == max.trees & !silent) {
    cat("\n","########### warning ##########","\n","\n")
    cat("maximum tree limit reached - results may not be optimal","\n")
    cat("  - refit with faster learning rate or increase maximum number of trees","\n") 
  }
    
# now assemble data to be returned

  gbm.detail <- list(dataframe = dataframe.name, gbm.x = gbm.x, predictor.names = names(x.data), 
    gbm.y = gbm.y, response.name = sp.name, offset = offset.name, family = family, tree.complexity = tree.complexity, 
    learning.rate = learning.rate, bag.fraction = bag.fraction, cv.folds = n.folds, 
    prev.stratification = prev.stratify, max.fitted = n.fitted, n.trees = target.trees, 
    best.trees = target.trees, train.fraction = 1.0, tolerance.method = tolerance.method, 
    tolerance = tolerance, var.monotone = var.monotone, date = date(), 
    elapsed.time.minutes = elapsed.time.minutes)

  training.stats <- list(null = total.deviance, mean.null = mean.total.deviance, 
    resid = resid.deviance, mean.resid = mean.resid.deviance, correlation = self.cor, 
    discrimination = self.roc, calibration = self.calibration)

  cv.stats <- list(deviance.mean = cv.dev, deviance.se = cv.dev.se, 
    correlation.mean = cv.cor, correlation.se = cv.cor.se,
    discrimination.mean = cv.roc, discrimination.se = cv.roc.se,
    calibration.mean = cv.calibration, calibration.se = cv.calibration.se)

  if (family == "bernoulli") {
    cv.stats$cv.threshold <- cv.threshold
    cv.stats$cv.threshold.se <- cv.threshold.se
  }

  rm(x.data,y.data, envir = globalenv())           #finally, clean up the temporary dataframes

# and assemble results for return

  gbm.object$gbm.call <- gbm.detail
  gbm.object$fitted <- fitted.values
  gbm.object$fitted.vars <- fitted.vars
  gbm.object$residuals <- residuals
  gbm.object$contributions <- gbm.summary
  gbm.object$self.statistics <- training.stats
  gbm.object$cv.statistics <- cv.stats
  gbm.object$weights <- site.weights
  gbm.object$trees.fitted <- trees.fitted
  gbm.object$training.loss.values <- training.loss.values
  gbm.object$cv.values <- cv.loss.values
  gbm.object$cv.loss.ses <- cv.loss.ses
  gbm.object$cv.loss.matrix <- cv.loss.matrix
  gbm.object$cv.roc.matrix <- cv.roc.stats

  if (keep.fold.models) gbm.object$fold.models <- model.list
  else gbm.object$fold.models <- NULL

  if (keep.fold.vector) gbm.object$fold.vector <- selector
  else gbm.object$fold.vector <- NULL

  if (keep.fold.fit) gbm.object$fold.fit <- fold.fit
  else gbm.object$fold.fit <- NULL

  return(gbm.object)  
}

"gbm.fixed" <-
function (data,                        # the input dataframe
  gbm.x,                               # indices of the predictors in the input dataframe
  gbm.y,                               # index of the response in the input dataframe
  tree.complexity = 1,                 # the tree depth - sometimes referred to as interaction depth
  site.weights = rep(1, nrow(data)),   # by default set equal
  verbose = TRUE,                      # to control reporting
  learning.rate = 0.001,               # controls speed of the gradient descent
  n.trees = 2000,                      # default number of trees
  train.fraction = 1,  
  bag.fraction = 0.5,                  # varies random sample size for each new tree
  family = "bernoulli",                # can be any of bernoulli, poisson, gaussian, laplace - note quotes
  keep.data = FALSE,                   # keep original data
  var.monotone = rep(0, length(gbm.x)) # constrain to positive (1) or negative monontone (-1)
  ) 
{
#
# j leathwick, j elith - 6th May 2007
#
# version 2.9 - developed in R 2.0
#
# calculates a gradient boosting (gbm)object with a fixed number of trees
# with the number of trees identified using gbm.step or some other procedure
#
# mostly used as a utility function, e.g., when being called by gbm.simplify
#
# takes as input a dataset and args selecting x and y variables, learning rate and tree complexity
#
# updated 13/6/05 to accommodate weighting of sites when calculating total and residual deviance
#
# updated 10/8/05 to correct how site.weights are returned
#
# requires gbm
#
#
  require(gbm)

# setup input data and assign to position one

  dataframe.name <- deparse(substitute(data))   # get the dataframe name

  x.data <- eval(data[, gbm.x])                 #form the temporary datasets
  names(x.data) <- names(data)[gbm.x]
  y.data <- eval(data[, gbm.y])
  sp.name <- names(data)[gbm.y]
    

  assign("x.data", x.data, pos = 1)             #and assign them for later use
  assign("y.data", y.data, pos = 1)

# fit the gbm model 

  z1 <- unclass(Sys.time())

  gbm.call <- paste("gbm(y.data ~ .,n.trees = n.trees, data=x.data, verbose = F, interaction.depth = tree.complexity, 
    weights = site.weights, shrinkage = learning.rate, distribution = as.character(family),
    var.monotone = var.monotone, bag.fraction = bag.fraction, keep.data = keep.data)", sep="")

  if (verbose) {
    print(paste("fitting gbm model with a fixed number of ",n.trees," trees for ",sp.name,sep=""),quote=FALSE) }

  gbm.object <- eval(parse(text = gbm.call))

  best.trees <- n.trees

#extract fitted values and summary table
  
  fitted.values <- predict.gbm(gbm.object,x.data,n.trees = n.trees,type="response")
  gbm.summary <- summary(gbm.object,n.trees = n.trees, plotit = FALSE)

  y_i <- y.data
  u_i <- fitted.values

  if (family == "poisson") {
    deviance.contribs <- ifelse(y_i == 0, 0, (y_i * log(y_i/u_i))) - (y_i - u_i)
    resid.deviance <- 2 * sum(deviance.contribs * site.weights)
    residuals <- sqrt(abs(deviance.contribs * 2))
    residuals <- ifelse((y_i - u_i) < 0, 0 - residuals, residuals)

    u_i <- sum(y.data * site.weights) / sum(site.weights)
    deviance.contribs <- ifelse(y_i == 0, 0, (y_i * log(y_i/u_i))) - (y_i - u_i)
    total.deviance <- 2 * sum(deviance.contribs * site.weights)
  }

  if (family == "bernoulli") {
    deviance.contribs <- (y_i * log(u_i)) + ((1-y_i) * log(1 - u_i))
    resid.deviance <- -2 * sum(deviance.contribs * site.weights)
    residuals <- sqrt(abs(deviance.contribs * 2))
    residuals <- ifelse((y_i - u_i) < 0, 0 - residuals, residuals)

    u_i <- sum(y.data * site.weights) / sum(site.weights)
    deviance.contribs <- (y_i * log(u_i)) + ((1-y_i) * log(1 - u_i))
    total.deviance <- -2 * sum(deviance.contribs * site.weights)
  }

  if (family == "laplace") {
    resid.deviance <- sum(abs(y_i - u_i))
    residuals <- y_i - u_i
    u_i <- mean(y.data)
    total.deviance <- sum(abs(y_i - u_i))
  }

  if (family == "gaussian") {
    resid.deviance <- sum((y_i - u_i) * (y_i - u_i))
    residuals <- y_i - u_i
    u_i <- mean(y.data)
    total.deviance <- sum((y_i - u_i) * (y_i - u_i))
  }

  if (verbose) {
    print(paste("total deviance = ",round(total.deviance,2),sep=""),quote=F)
    print(paste("residual deviance = ",round(resid.deviance,2),sep=""),quote=F)}

# now assemble data to be returned

  z2 <- unclass(Sys.time())
  elapsed.time.minutes <- round((z2 - z1)/ 60,2)  #calculate the total elapsed time

  gbm.detail <- list(dataframe = dataframe.name, gbm.x = gbm.x, predictor.names = names(x.data), 
    gbm.y = gbm.y, reponse.name = names(y.data), family = family, tree.complexity = tree.complexity, 
    learning.rate = learning.rate, bag.fraction = bag.fraction, cv.folds = 0, n.trees = n.trees, best.trees = best.trees, 
    train.fraction = train.fraction, var.monotone = var.monotone, date = date(), elapsed.time.minutes = elapsed.time.minutes)

  gbm.object$gbm.call <- gbm.detail
  gbm.object$fitted <- fitted.values
  gbm.object$residuals <- residuals
  gbm.object$contributions <- gbm.summary
  gbm.object$self.statistics <- list(null.deviance = total.deviance, resid.deviance = resid.deviance)
  gbm.object$weights <- site.weights

  rm(x.data,y.data, pos=1)           #finally, clean up the temporary dataframes

  return(gbm.object)
}


"gbm.holdout" <-
function (data,                        # the input data frame
   gbm.x,                              # indices of predictor variables
   gbm.y,                              # index of response variable
   learning.rate = 0.001,              # typically varied between 0.1 and 0.001
   tree.complexity = 1,                # sometimes called interaction depth
   family = "bernoulli",               # "bernoulli","poisson", etc. as for gbm
   n.trees = 200,                      # initial number of trees
   add.trees = n.trees,                # number of trees to add at each increment
   max.trees = 20000,                  # maximum number of trees to fit
   verbose = TRUE,                     # controls degree of screen reporting
   train.fraction = 0.8,               # proportion of data to use for training
   permute = TRUE,                     # reorder data to start with
   prev.stratify = TRUE,               # stratify selection for p/a data
   var.monotone = rep(0, length(gbm.x)),# allows constraining of response to monotone 
   site.weights = rep(1, nrow(data)),  # set equal to 1 by default
   refit = TRUE,                       # refit the model with the full data but id'd no of trees
   keep.data = TRUE)                   # keep copy of the data
{
#
# j leathwick, j elith - October 2006
#
# version 2.9 - developed in R 2.3.1
#
# calculates a gradient boosting (gbm)object in which model complexity is 
# determined using a training set with predictions made to a withheld set
# an initial set of trees is fitted, and then trees are progressively added
# testing performance # along the way, using gbm.perf until the optimal
# number of trees is identified
#
# as any structured ordering of the data should be avoided, a copy of the data set 
# BY DEFAULT is randomly reordered each time the function is run
#
# takes as input a dataset and args selecting x and y variables, and degree of interaction depth
#
# requires gbm
#
#
  require(gbm)

# setup input data and assign to position one

  dataframe.name <- deparse(substitute(data))  # get the dataframe name
  cv.folds <- 0 

  if (permute) { 
    print("",quote=FALSE)
    print("WARNING - data is being randomly reordered to avoid confounding effects",quote=FALSE)
    print("of inherent structure as submitted - use permute = FALSE to turn off this option",quote=FALSE)
    n.rows <- nrow(data)

    if (prev.stratify == TRUE & family == "bernoulli") {

      presence.mask <- data[,gbm.y] == 1
      absence.mask <- data[,gbm.y] == 0
      n.pres <- sum(presence.mask)
      n.abs <- sum(absence.mask)

      selector <- seq(1,n.rows)

      temp <- sample(selector[presence.mask],size = n.pres * train.fraction)
      selector[temp] <- 0

      temp <- sample(selector[absence.mask],size = n.abs * train.fraction)
      selector[temp] <- 0 

      sort.vector <- sort(selector,index.return = TRUE)[[2]]
    }

    else {
      sort.vector <- sample(seq(1,n.rows),n.rows,replace=FALSE)
    }
            
    sort.data <- data[sort.vector,]

    x.data <- eval(sort.data[, gbm.x])                 #form the temporary datasets
    y.data <- eval(sort.data[, gbm.y])
  }
  else {
    x.data <- eval(data[, gbm.x])                 #form the temporary datasets
    y.data <- eval(data[, gbm.y])
  }

  names(x.data) <- names(data)[gbm.x]
  sp.name <- names(data)[gbm.y]
    
  assign("x.data", x.data, pos = 1)               #and assign them for later use
  assign("y.data", y.data, pos = 1)

# fit the gbm model 

  print(paste("fitting initial gbm model of ",n.trees," trees for ",sp.name,sep=""),quote=FALSE)
  print(" and expanding using withheld data for evaluation",quote=FALSE) 

  gbm.call <- paste("gbm(y.data ~ .,n.trees = n.trees, data=x.data, verbose = F, interaction.depth = tree.complexity, 
    weights = site.weights, shrinkage = learning.rate, cv.folds = 0, distribution = as.character(family),
    train.fraction = train.fraction, var.monotone = var.monotone, keep.data = keep.data)", sep="")

  gbm.object <- eval(parse(text = gbm.call))

# identify the best number of trees using method appropriate to model

  best.trees <- gbm.perf(gbm.object, method = 'test', plot.it = FALSE) 

  n.fitted <- n.trees

  if (verbose) print("expanding model to find optimal no of trees...",quote=FALSE)

  while(gbm.object$n.trees - best.trees < n.trees & n.fitted < max.trees){

    gbm.object <- gbm.more(gbm.object, add.trees)
    best.trees <- gbm.perf(gbm.object, method = 'test', plot.it = FALSE)
    n.fitted <- n.fitted + add.trees

    if (n.fitted %% 100 == 0){ #report times along the way
      if (verbose) print(paste("fitted trees = ", n.fitted, sep = ""), quote = FALSE)
    }
  }

  if (verbose) print(paste("fitting stopped at ",best.trees," trees",sep=""),quote=FALSE)

  if (refit) {  # we are refitting the model with fixed tree size
    print(paste("refitting the model to the full dataset using ",best.trees," trees",sep=""),quote=FALSE)

    x.data <- eval(data[, gbm.x])                 #form the temporary datasets
    y.data <- eval(data[, gbm.y])
     
    gbm.call <- eval(paste("gbm(y.data ~ .,n.trees = best.trees, data=x.data, verbose = F, interaction.depth = tree.complexity, 
      weights = site.weights, shrinkage = learning.rate, cv.folds = 0, distribution = as.character(family),
      var.monotone = var.monotone, keep.data = keep.data)", sep=""))

    gbm.object <- eval(parse(text = gbm.call))

  }

#extract fitted values and summary table
  
  fitted.values <- predict.gbm(gbm.object,x.data,n.trees = best.trees,type="response")
  gbm.summary <- summary(gbm.object,n.trees = best.trees, plotit = FALSE)

  y_i <- y.data
  u_i <- fitted.values

  if (family == "poisson") {
    deviance.contribs <- ifelse(y_i == 0, 0, (y_i * log(y_i/u_i))) - (y_i - u_i)
    resid.deviance <- 2 * sum(deviance.contribs)
    residuals <- sqrt(abs(deviance.contribs * 2))
    residuals <- ifelse((y_i - u_i) < 0, 0 - residuals, residuals)
    u_i <- mean(y.data)
    total.deviance <- 2 * sum(ifelse(y_i == 0, 0, (y_i * log(y_i/u_i))) - (y_i - u_i))
  }

  if (family == "bernoulli") {
    deviance.contribs <- (y_i * log(u_i)) + ((1-y_i) * log(1 - u_i))
    resid.deviance <- -2 * sum(deviance.contribs)
    residuals <- sqrt(abs(deviance.contribs * 2))
    residuals <- ifelse((y_i - u_i) < 0, 0 - residuals, residuals)
    u_i <- mean(y.data)
    total.deviance <- -2 * sum((y_i * log(u_i)) + ((1-y_i) * log(1 - u_i)))
  }

  if (verbose) {
    print(paste("total deviance = ",round(total.deviance,2),sep=""),quote=F)
    print(paste("residual deviance = ",round(resid.deviance,2),sep=""),quote=F)
  }

# now assemble data to be returned

  gbm.detail <- list(dataframe = dataframe.name, gbm.x = gbm.x, predictor.names = names(x.data), 
    gbm.y = gbm.y, response.name = sp.name, tree.complexity = tree.complexity, n.trees = best.trees, 
    learning.rate = learning.rate, best.trees = best.trees, cv.folds = cv.folds, 
    family = family, train.fraction = train.fraction, var.monotone = var.monotone )

  gbm.object$fitted <- fitted.values
  gbm.object$residuals <- residuals
  gbm.object$contributions <- gbm.summary
  gbm.object$deviances <- list(null.deviance = total.deviance, resid.deviance = resid.deviance) 
  gbm.object$weights <- weights
  gbm.object$gbm.call <- gbm.detail

  rm(x.data,y.data, pos=1)           #finally, clean up the temporary dataframes

  return(gbm.object)
}


"gbm.plot" <-
function(gbm.object,                # a gbm object - could be one from gbm.step
     variable.no = 0,               # the var to plot - if zero then plots all
     nt = gbm.object$n.trees,       # how many trees to use
     smooth = FALSE,                # should we add a smoothed version of the fitted function 
     rug = T,                       # plot a rug of deciles
     n.plots = length(pred.names),  # plot the first n most important preds
     common.scale = T,              # use a common scale on the y axis
     write.title = T,               # plot a title above the plot
     y.label = "fitted function",   # the default y-axis label
     x.label = NULL,                # the default x-axis label
     show.contrib = T,              # show the contribution on the x axis
     plot.layout = c(3,4),          # define the default layout for graphs on the page  
     rug.side = 3,                  # which axis for rug plot? default (3) is top; bottom=1
     rug.lwd = 1,                   # line width for rug plots
     rug.tick = 0.03,               # tick length for rug plots
     ...                            # other arguments to pass to the plotting 
                                    # useful options include cex.axis, cex.lab, etc.
     )
{
# function to plot gbm response variables, with the option
# of adding a smooth representation of the response if requested
# additional options in this version allow for plotting on a common scale
# note too that fitted functions are now centered by subtracting their mean
# 
# version 2.9
#
# j. leathwick/j. elith - March 2007
#

require(gbm)
require(splines)

gbm.call <- gbm.object$gbm.call
gbm.x <- gbm.call$gbm.x
pred.names <- gbm.call$predictor.names
response.name <- gbm.call$response.name
dataframe.name <- gbm.call$dataframe
data <- eval(parse(text = dataframe.name))

max.plots <- plot.layout[1] * plot.layout[2]
plot.count <- 0
n.pages <- 1

if (length(variable.no) > 1) {stop("only one response variable can be plotted at a time")}

if (variable.no > 0) {   #we are plotting all vars in rank order of contribution
  n.plots <- 1
  }

max.vars <- length(gbm.object$contributions$var)
if (n.plots > max.vars) {
  n.plots <- max.vars
  cat("warning - reducing no of plotted predictors to maximum available (",max.vars,")\n",sep="")
  }

predictors <- list(rep(NA,n.plots)) # matrix(0,ncol=n.plots,nrow=100)
responses <- list(rep(NA,n.plots)) # matrix(0,ncol=n.plots,nrow=100)

for (j in c(1:n.plots)) {  #cycle through the first time and get the range of the functions
  if (n.plots == 1) {
    k <- variable.no
  }
  else k <- match(gbm.object$contributions$var[j],pred.names)

  if (is.null(x.label)) var.name <- gbm.call$predictor.names[k]
    else var.name <- x.label

  pred.data <- data[,gbm.call$gbm.x[k]]

  response.matrix <- plot.gbm(gbm.object, i.var = k, n.trees = nt, return.grid = TRUE,...)

  predictors[[j]] <- response.matrix[,1]
  if (is.factor(data[,gbm.call$gbm.x[k]])) {
    predictors[[j]] <- factor(predictors[[j]],levels = levels(data[,gbm.call$gbm.x[k]]))
    }
  responses[[j]] <- response.matrix[,2] - mean(response.matrix[,2])

  if(j == 1) {
    ymin = min(responses[[j]])
    ymax = max(responses[[j]])
    }
  else {
    ymin = min(ymin,min(responses[[j]]))
    ymax = max(ymax,max(responses[[j]]))
    }
  }

# now do the actual plots

  for (j in c(1:n.plots)) {

   if (plot.count == max.plots) {
     plot.count = 0
     n.pages <- n.pages + 1
   }

   if (plot.count == 0) {
#     windows(width = 11, height = 8)
	opar <- par()
    par(mfrow = plot.layout)
   }

    plot.count <- plot.count + 1

    if (n.plots == 1) {
      k <- match(pred.names[variable.no],gbm.object$contributions$var)
      if (show.contrib) {
         x.label <- paste(var.name,"  (",round(gbm.object$contributions[k,2],1),"%)",sep="")
      }
    }
    else {
      k <- match(gbm.object$contributions$var[j],pred.names)
      var.name <- gbm.call$predictor.names[k]
      if (show.contrib) {
         x.label <- paste(var.name,"  (",round(gbm.object$contributions[j,2],1),"%)",sep="")
      }
      else x.label <- var.name
    }

    if (common.scale) {
      plot(predictors[[j]],responses[[j]],ylim=c(ymin,ymax), type='l',
        xlab = x.label, ylab = y.label, ...)
    }
    else {
      plot(predictors[[j]],responses[[j]], type='l', 
        xlab = x.label, ylab = y.label, ...)
    }
    if (smooth & is.vector(predictors[[j]])) {
      temp.lo <- loess(responses[[j]] ~ predictors[[j]], span = 0.3)
      lines(predictors[[j]],fitted(temp.lo), lty = 2, col = 2)
    }
    if (plot.count == 1) {
      if (write.title) {
        title(paste(response.name," - page ",n.pages,sep=""))
      }
      if (rug & is.vector(data[,gbm.call$gbm.x[k]])) {
        rug(quantile(data[,gbm.call$gbm.x[k]], probs = seq(0, 1, 0.1), na.rm = TRUE), side = rug.side, lwd = rug.lwd, ticksize = rug.tick)
      }
    }
    else {
      if (write.title & j == 1) {
        title(response.name)
      }
      if (rug & is.vector(data[,gbm.call$gbm.x[k]])) {
        rug(quantile(data[,gbm.call$gbm.x[k]], probs = seq(0, 1, 0.1), na.rm = TRUE), side = rug.side, lwd = rug.lwd, ticksize = rug.tick)
      }
    }
  }
  par(opar)
  rm(opar)
}


"gbm.perspec" <-
function(gbm.object, 
     x = 1,                # the first variable to be plotted
     y = 2,                # the second variable to be plotted
     pred.means = NULL,    # allows specification of values for other variables
     x.label = NULL,       # allows manual specification of the x label
     x.range = NULL,       # manual range specification for the x variable
     y.label = NULL,       # and y label
     y.range = NULL,       # and the y
     z.range = NULL,       # allows control of the vertical axis
     ticktype = "detailed",# specifiy detailed types - otherwise "simple"
     theta = 55,           # rotation 
     phi=40,               # and elevation
     smooth = "none",      # controls smoothing of the predicted surface
     mask = FALSE,         # controls masking using a sample intensity model
     perspective = TRUE,   # controls whether a contour or perspective plot is drawn
     ...)                  # allows the passing of additional arguments to plotting routine
                           # useful options include shade, ltheta, lphi for controlling illumination
                           # and cex for controlling text size - cex.axis and cex.lab have no effect
{
#
# gbm.perspec version 2.9 April 2007
# J Leathwick/J Elith
#
# takes a gbm boosted regression tree object produced by gbm.step and
# plots a perspective plot showing predicted values for two predictors
# as specified by number using x and y
# values for all other variables are set at their mean by default
# but values can be specified by giving a list consisting of the variable name
# and its desired value, e.g., c(name1 = 12.2, name2 = 57.6)

  require(gbm)
  require(splines)

#get the boosting model details

  gbm.call <- gbm.object$gbm.call
  gbm.x <- gbm.call$gbm.x
  n.preds <- length(gbm.x)
  gbm.y <- gbm.call$gbm.y
  pred.names <- gbm.call$predictor.names
  family = gbm.call$family

  x.name <- gbm.call$predictor.names[x]

  if (is.null(x.label)) {
    x.label <- gbm.call$predictor.names[x]}    

  y.name <- gbm.call$predictor.names[y]

  if (is.null(y.label)) {
    y.label <- gbm.call$predictor.names[y]}

  data <- eval(parse(text=gbm.call$dataframe))[,gbm.x]  
  n.trees <- gbm.call$best.trees

  if (is.null(x.range)) {
    x.var <- seq(min(data[,x],na.rm=T),max(data[,x],na.rm=T),length = 50)
  }
  else {x.var <- seq(x.range[1],x.range[2],length = 50)}

  if (is.null(y.range)) {
    y.var <- seq(min(data[,y],na.rm=T),max(data[,y],na.rm=T),length = 50)
  }
  else {y.var <- seq(y.range[1],y.range[2],length = 50)}

  pred.frame <- expand.grid(list(x.var,y.var))
  names(pred.frame) <- c(x.name,y.name)

  j <- 3
  for (i in 1:n.preds) {
    if (i != x & i != y) {
      if (is.vector(data[,i])) {
        m <- match(pred.names[i],names(pred.means))
        if (is.na(m)) {
          pred.frame[,j] <- mean(data[,i],na.rm=T)
        }
        else pred.frame[,j] <- pred.means[m]
      }
      if (is.factor(data[,i])) {
        m <- match(pred.names[i],names(pred.means))
        temp.table <- table(data[,i])
        if (is.na(m)) {
          pred.frame[,j] <- rep(names(temp.table)[2],2500)
        }
        else pred.frame[,j] <- pred.means[m]
        pred.frame[,j] <- factor(pred.frame[,j],levels=names(temp.table))
      }
      names(pred.frame)[j] <- pred.names[i]
      j <- j + 1
     }
  }
#
# form the prediction
#
  prediction <- predict.gbm(gbm.object,pred.frame,n.trees = n.trees, type="response")

# model smooth if required

  if (smooth == "model") {
    pred.glm <- glm(prediction ~ ns(pred.frame[,1], df = 8) * ns(pred.frame[,2], df = 8), data=pred.frame,family=poisson)
    prediction <- fitted(pred.glm)
  }

# report the maximum value and set up realistic ranges for z

  max.pred <- max(prediction)
  cat("maximum value = ",round(max.pred,2),"\n")

  if (is.null(z.range)) {
    if (family == "bernoulli") {
      z.range <- c(0,1)
    }
    else if (family == "poisson") {
      z.range <- c(0,max.pred * 1.1)
    }
    else {
      z.min <- min(data[,y],na.rm=T)
      z.max <- max(data[,y],na.rm=T)
      z.delta <- z.max - z.min
      z.range <- c(z.min - (1.1 * z.delta), z.max + (1.1 * z.delta))
    }
  }
# form the matrix

  pred.matrix <- matrix(prediction,ncol=50,nrow=50)

# kernel smooth if required

  if (smooth == "average") {  #apply a 3 x 3 smoothing average
     pred.matrix.smooth <- pred.matrix
     for (i in 2:49) {
       for (j in 2:49) {
         pred.matrix.smooth[i,j] <- mean(pred.matrix[c((i-1):(i+1)),c((j-1):(j+1))])
       }
     }
  pred.matrix <- pred.matrix.smooth
  }

# mask out values inside hyper-rectangle but outside of sample space

  if (mask) {
    mask.trees <- mask.object$gbm.call$best.trees
    point.prob <- predict.gbm(mask.object[[1]],pred.frame, n.trees = mask.trees, type="response")
    point.prob <- matrix(point.prob,ncol=50,nrow=50)
    pred.matrix[point.prob < 0.5] <- 0.0
  }
#
# and finally plot the result
#
  if (!perspective) {
    image(x = x.var, y = y.var, z = pred.matrix, zlim = z.range)
  }
  else {
    persp(x=x.var, y=y.var, z=pred.matrix, zlim= z.range,      # input vars
      xlab = x.label, ylab = y.label, zlab = "fitted value",   # labels
      theta=theta, phi=phi, r = sqrt(10), d = 3,               # viewing pars
      ticktype = ticktype, mgp = c(4,1,0), ...) #
  }
}

"calibration" <-
function(obs, preds, family = "binomial")
{
#
# j elith/j leathwick 17th March 2005
# calculates calibration statistics for either binomial or count data
# but the family argument must be specified for the latter 
# a conditional test for the latter will catch most failures to specify
# the family
#

if (family == "bernoulli") family <- "binomial"
pred.range <- max(preds) - min(preds)
if(pred.range > 1.2 & family == "binomial") {
print(paste("range of response variable is ", round(pred.range, 2)), sep = "", quote = F)
print("check family specification", quote = F)
return()
}
if(family == "binomial") {
pred <- preds + 1e-005
pred[pred >= 1] <- 0.99999
mod <- glm(obs ~ log((pred)/(1 - (pred))), family = binomial)
lp <- log((pred)/(1 - (pred)))
a0b1 <- glm(obs ~ offset(lp) - 1, family = binomial)
miller1 <- 1 - pchisq(a0b1$deviance - mod$deviance, 2)
ab1 <- glm(obs ~ offset(lp), family = binomial)
miller2 <- 1 - pchisq(a0b1$deviance - ab1$deviance, 1)
miller3 <- 1 - pchisq(ab1$deviance - mod$deviance, 1)
}
if(family == "poisson") {
mod <- glm(obs ~ log(preds), family = poisson)
lp <- log(preds)
a0b1 <- glm(obs ~ offset(lp) - 1, family = poisson)
miller1 <- 1 - pchisq(a0b1$deviance - mod$deviance, 2)
ab1 <- glm(obs ~ offset(lp), family = poisson)
miller2 <- 1 - pchisq(a0b1$deviance - ab1$deviance, 1)
miller3 <- 1 - pchisq(ab1$deviance - mod$deviance, 1)
}
calibration.result <- c(mod$coef, miller1, miller2, miller3)
names(calibration.result) <- c("intercept", "slope", "testa0b1", "testa0|b1", "testb1|a")
return(calibration.result)
}

"roc" <-
function (obsdat, preddat) 
{
# code adapted from Ferrier, Pearce and Watson's code, by J.Elith
#
# see:
# Hanley, J.A. & McNeil, B.J. (1982) The meaning and use of the area
# under a Receiver Operating Characteristic (ROC) curve.
# Radiology, 143, 29-36
#
# Pearce, J. & Ferrier, S. (2000) Evaluating the predictive performance
# of habitat models developed using logistic regression.
# Ecological Modelling, 133, 225-245.
# this is the non-parametric calculation for area under the ROC curve, 
# using the fact that a MannWhitney U statistic is closely related to
# the area
#
    if (length(obsdat) != length(preddat)) 
        stop("obs and preds must be equal lengths")
    n.x <- length(obsdat[obsdat == 0])
    n.y <- length(obsdat[obsdat == 1])
    xy <- c(preddat[obsdat == 0], preddat[obsdat == 1])
    rnk <- rank(xy)
    wilc <- ((n.x * n.y) + ((n.x * (n.x + 1))/2) - sum(rnk[1:n.x]))/(n.x * 
        n.y)
    return(round(wilc, 4))
}

"calc.deviance" <-
function(obs.values, fitted.values, weights = rep(1,length(obs.values)), family="binomial", calc.mean = TRUE)
{
# j. leathwick/j. elith
#
# version 2.1 - 5th Sept 2005
#
# function to calculate deviance given two vectors of raw and fitted values
# requires a family argument which is set to binomial by default
#
#

if (length(obs.values) != length(fitted.values)) 
   stop("observations and predictions must be of equal length")

y_i <- obs.values

u_i <- fitted.values
 
if (family == "binomial" | family == "bernoulli") {
 
   deviance.contribs <- (y_i * log(u_i)) + ((1-y_i) * log(1 - u_i))
   deviance <- -2 * sum(deviance.contribs * weights)

}

if (family == "poisson" | family == "Poisson") {

    deviance.contribs <- ifelse(y_i == 0, 0, (y_i * log(y_i/u_i))) - (y_i - u_i)
    deviance <- 2 * sum(deviance.contribs * weights)

}

if (family == "laplace") {
    deviance <- sum(abs(y_i - u_i))
    }

if (family == "gaussian") {
    deviance <- sum((y_i - u_i) * (y_i - u_i))
    }
    

if (calc.mean) deviance <- deviance/length(obs.values)

return(deviance)

}

"gbm.interactions" <-
function(gbm.object, 
   use.weights = FALSE,   	# use weights for samples 
   mask.object)       		# a gbm object describing sample intensity 
{
#
# gbm.interactions version 2.9
#
# j. leathwick, j. elith - May 2007
#
# functions assesses the magnitude of 2nd order interaction effects 
# in gbm models fitted with interaction depths greater than 1
# this is achieved by:
#   1. forming predictions on the linear scale for each predictor pair;
#   2. fitting a linear model that relates these predictions to the predictor
#        pair, with the the predictors fitted as factors;
#   3. calculating the mean value of the residuals, the magnitude of which
#        increases with the strength of any interaction effect;
#   4. results are stored in an array;
#   5. finally, the n most important interactions are identified,
#        where n is 25% of the number of interaction pairs;

  require(gbm)

  gbm.call <- gbm.object$gbm.call
  n.trees <- gbm.call$best.trees
  depth <- gbm.call$interaction.depth
  gbm.x <- gbm.call$gbm.x
  n.preds <- length(gbm.x)
  pred.names <- gbm.object$gbm.call$predictor.names
  cross.tab <- matrix(0,ncol=n.preds,nrow=n.preds)
  dimnames(cross.tab) <- list(pred.names,pred.names)

  if (use.weights) mask.trees <- mask.object$gbm.call$best.trees

  cat("gbm.interactions - version 2.9 \n")
  cat("Cross tabulating interactions for gbm model with ",n.preds," predictors","\n",sep="")

  data <- eval(parse(text=gbm.call$dataframe))[,gbm.x]  

  for (i in 1:(n.preds - 1)) {  # step through the predictor set

    if (is.vector(data[,i])) {  # create a sequence through the range
       x.var <- seq(min(data[,i],na.rm=T),max(data[,i],na.rm=T),length = 20)
       }
    else {                      # otherwise set up simple factor variable
       x.var <- factor(names(table(data[,i])),levels = levels(data[,i]))
       }
    x.length <- length(x.var)

    cat(i,"\n")

    for (j in (i+1):n.preds) { #create vector or factor data for second variable
      
      if (is.vector(data[,j])) {
        y.var <- seq(min(data[,j],na.rm=T),max(data[,j],na.rm=T),length = 20)
      }
      else {
        y.var <- factor(names(table(data[,j])),levels = levels(data[,j]))
      }
      y.length <- length(y.var)

# and now make a temporary data frame

      pred.frame <- expand.grid(list(x.var,y.var))
      names(pred.frame) <- c(pred.names[i],pred.names[j])

      n <- 3 # and add the balance of the variables to it

      for (k in 1:n.preds) {
        if (k != i & k != j) {
          if (is.vector(data[,k])) {  # either with the mean
            pred.frame[,n] <- mean(data[,k],na.rm=T)
          }
          else {   # or the most common factor level
            temp.table <- sort(table(data[,k]),decreasing = TRUE)
            pred.frame[,n] <- rep(names(temp.table)[1],x.length * y.length)
            pred.frame[,n] <- as.factor(pred.frame[,n])
          }
          names(pred.frame)[n] <- pred.names[k]
          n <- n + 1
        }
      }
#
# form the prediction
#
      prediction <- predict.gbm(gbm.object,pred.frame,n.trees = n.trees, type="link")

      if (use.weights) {
        point.prob <- predict.gbm(mask.object[[1]],pred.frame, n.trees = mask.trees, type="response")
        interaction.test.model <- lm(prediction ~ as.factor(pred.frame[,1]) + as.factor(pred.frame[,2]), 
          weights = point.prob)
      }
  
      else {

        interaction.test.model <- lm(prediction ~ as.factor(pred.frame[,1]) + as.factor(pred.frame[,2]))
      }
        
      interaction.flag <- round(mean(resid(interaction.test.model)^2) * 1000,2)

      cross.tab[i,j] <- interaction.flag

    }   # end of j loop
  }  # end of i loop

# create an index of the values in descending order

  search.index <- ((n.preds^2) + 1) - rank(cross.tab, ties.method = "first")

  n.important <- max(2,round(0.1 * ((n.preds^2)/2),0))
  var1.names <- rep(" ",n.important)
  var1.index <- rep(0,n.important)
  var2.names <- rep(" ",n.important)
  var2.index <- rep(0,n.important)
  int.size <- rep(0,n.important)

  for (i in 1:n.important) {
  
    index.match <- match(i,search.index)

    j <- trunc(index.match/n.preds) + 1
    var1.index[i] <- j
    var1.names[i] <- pred.names[j]

    k <- index.match%%n.preds
    if (k > 0) {   #only do this if k > 0 - otherwise we have all zeros from here on 
      var2.index[i] <- k
      var2.names[i] <- pred.names[k]

      int.size[i] <- cross.tab[k,j]
    }

  }

  rank.list <- data.frame(var1.index,var1.names,var2.index,var2.names,int.size)

  return(list(rank.list = rank.list, interactions = cross.tab, gbm.call = gbm.object$gbm.call))
}


"gbm.plot.fits" <-
function(gbm.object, 
  mask.presence = FALSE, 
  use.factor = FALSE,
  plot.layout = c(3,4)          # define the default layout for graphs on the page
)
{
#
# j leathwick, j elith - 7th January 2005
#
# version 2.0 - developed in R 2.0
#
# to plot distribution of fitted values in relation to ydat from mars or other p/a models
# allows masking out of absences to enable focus on sites with high predicted values
# fitted values = those from model; raw.values = original y values
# label = text species name; ydat = predictor dataset
# mask.presence forces function to only plot fitted values for presences
# use.factor forces to use quicker printing box and whisker plot
# file.name routes to a pdf file of this name
#

    max.plots <- plot.layout[1] * plot.layout[2]
    plot.count <- 0

    dat <- gbm.object$gbm.call$dataframe    #get the dataframe name
    dat <- as.data.frame(eval(parse(text=dat)))   #and now the data

    n.cases <- nrow(dat)

    gbm.call <- gbm.object$gbm.call	#and the mars call details
    gbm.x <- gbm.call$gbm.x    
    gbm.y <- gbm.call$gbm.y
    family <- gbm.call$family

    xdat <- as.data.frame(dat[,gbm.x])
    ydat <- as.data.frame(dat[,gbm.y])

    n.preds <- ncol(xdat)

    fitted.values <- gbm.object$fitted

    pred.names <- names(dat)[gbm.x]
    sp.name <- names(dat)[gbm.y]

    if (mask.presence) {
	  mask <- ydat == 1 }
    else {
	  mask <- rep(TRUE, length = n.cases) }

    robust.max.fit <- approx(ppoints(fitted.values[mask]), sort(fitted.values[mask]), 0.99) #find 99%ile value

    for (j in 1:n.preds) {

     if (plot.count == max.plots) {
       plot.count = 0
     }

     if (plot.count == 0) {
#       windows(width = 11, height = 8)
		opar <- par()
		par(mfrow = plot.layout)
     }

     plot.count <- plot.count + 1

	  if (is.numeric(xdat[mask,j])) {
            wt.mean <- zapsmall(mean((xdat[mask, j] * fitted.values[mask]^5)/mean(fitted.values[mask]^5),na.rm=TRUE),2)
            }
        else {wt.mean <- "na"}
 	  if (use.factor) {
	      temp <- factor(cut(xdat[mask, j], breaks = 12))
		if (family == "binomial") {
		    plot(temp, fitted.values[mask], xlab = pred.names[j], ylab = "fitted values", ylim = c(0, 1))}
		else {
		    plot(temp, fitted.values[mask], xlab = pred.names[j], ylab = "fitted values")}
		}
 	  else {
		if (family == "binomial") {
		    plot(xdat[mask, j], fitted.values[mask], xlab = pred.names[j], ylab = "fitted values", 
					ylim = c(0, 1))}
		    else {
			plot(xdat[mask, j], fitted.values[mask], xlab = pred.names[j], ylab = "fitted values")}
		    }
		abline(h = (0.333 * robust.max.fit$y), lty = 2.)
		if (j == 1) { 
			title(paste(sp.name, ", wtm = ", wt.mean))}
		else {
			title(paste("wtm = ", wt.mean))}
    }
	par(opar)
	rm(opar)
}

"gbm.simplify" <-
function(gbm.object,          # a gbm object describing sample intensity 
  n.folds = 10,               # number of times to repeat the analysis
  n.drops = "auto",           # can be automatic or an integer specifying the number of drops to check
  alpha = 1,                  # controls stopping when n.drops = "auto"
  prev.stratify = TRUE,       # use prevalence stratification in selecting evaluation data
  eval.data = NULL,           # an independent evaluation data set - leave here for now
  plot = TRUE)                # plot results
{
# function to simplify a brt model fitted using gbm.step
# 
# version 2.9 - J. Leathwick/J. Elith - June 2007
#
# starts with an inital cross-validated model as produced by gbm.step
# and then assesses the potential to remove predictors using k-fold cv
# does this for each fold, removing the lowest contributing predictor, 
# and repeating this process for a set number of steps 
# after the removal of each predictor, the change in predictive deviance
# is computed relative to that obtained when using all predictors
# it returns a list containing the mean change in deviance and its se
# as a function of the number of variables removed
# having completed the cross validation, it then identifies the sequence
# of variable to remove when using the full data set, testing this
# up to the number of steps used in the cross-validation phase of the analysis
# with results reported to the screen - it then returns 
# a table containing the order in which variables are to be removed
# and a list of vectors, each of which specifies the predictor col numbers
# in the original dataframe  - the latter can be used as an argument to gbm.step
# e.g., gbm.step(data = data, gbm.x = simplify.object$pred.list[[4]]...
# would implement a new analysis with the original predictor set, minus its
# four lowest contributing predictors
#

require(gbm)

# first get the original analysis details..

  gbm.call <- gbm.object$gbm.call
  data <- eval(parse(text=gbm.call$dataframe))
  n.cases <- nrow(data)
  gbm.x <- gbm.call$gbm.x
  gbm.y <- gbm.call$gbm.y
  family <- gbm.call$family
  lr <- gbm.call$learning.rate
  tc <- gbm.call$tree.complexity
  start.preds <- length(gbm.x)
  max.drops <- start.preds - 2
  response.name <- gbm.call$response.name
  predictor.names <- gbm.call$predictor.names
  n.trees <- gbm.call$best.trees
  pred.list <- list(initial = gbm.x)
  weights <- gbm.object$weights

  if (n.drops == "auto") auto.stop <- TRUE
  
  else auto.stop <- FALSE

# take a copy of the original data and starting predictors

  orig.data <- data
  orig.gbm.x <- gbm.x

#  if (!is.null(eval.data)) independent.test <- TRUE
#    else independent.test <- FALSE

# extract original performance statistics...

  original.deviance <- round(gbm.object$cv.statistics$deviance.mean,4)
  original.deviance.se <- round(gbm.object$cv.statistics$deviance.se,4)

  cat("gbm.simplify - version 2.9","\n\n")
  cat("simplifying gbm.step model for ",response.name," with ",start.preds," predictors",sep="")
  cat(" and ",n.cases," observations \n",sep="")
  cat("original deviance = ",original.deviance,"(",original.deviance.se,")\n\n",sep="")

# check that n.drops is less than n.preds - 2 and update if required

  if (auto.stop) {
    cat("variable removal will proceed until average change exceeds the original se\n\n")
    n.drops <- 1 }
  else{
    if (n.drops > start.preds - 2) {
      cat("value of n.drops (",n.drops,") is greater than permitted","\n",
        "resetting value to ",start.preds - 2,"\n\n",sep="")
      n.drops <- start.preds - 2
    }
    else {
      cat("a fixed number of",n.drops,"drops will be tested\n\n")
    }
  }

# set up storage for results

  dev.results <- matrix(0, nrow = n.drops, ncol = n.folds)
  dimnames(dev.results) <- list(paste("drop.",1:n.drops,sep=""),
   paste("rep.",1:n.folds,sep=""))

  drop.count <- matrix(NA, nrow = start.preds, ncol = n.folds)
  dimnames(drop.count) <- list(predictor.names,paste("rep.",1:n.folds,sep=""))

  original.deviances <- rep(0,n.folds)

  model.list <- list(paste("model",c(1:n.folds),sep=""))     # dummy list for the tree models

# create gbm.fixed function call

  gbm.call.string <- paste("try(gbm.fixed(data=train.data,gbm.x=gbm.new.x,gbm.y=gbm.y,",sep="")
  gbm.call.string <- paste(gbm.call.string,"family=family,learning.rate=lr,tree.complexity=tc,",sep="")
  gbm.call.string <- paste(gbm.call.string,"n.trees = ",n.trees,", site.weights = weights.subset,verbose=FALSE))",sep="")
  
# now set up the fold structure

  if (prev.stratify & family == "bernoulli") {
    presence.mask <- data[,gbm.y] == 1
    absence.mask <- data[,gbm.y] == 0
    n.pres <- sum(presence.mask)
    n.abs <- sum(absence.mask)

# create a vector of randomised numbers and feed into presences
    selector <- rep(0,n.cases)
    temp <- rep(seq(1, n.folds, by = 1), length = n.pres)
    temp <- temp[order(runif(n.pres, 1, 100))]
    selector[presence.mask] <- temp

# and then do the same for absences
    temp <- rep(seq(1, n.folds, by = 1), length = n.abs)
    temp <- temp[order(runif(n.abs, 1, 100))]
    selector[absence.mask] <- temp
    }

  else {  #otherwise make them random with respect to presence/absence
    selector <- rep(seq(1, n.folds, by = 1), length = n.cases)
    selector <- selector[order(runif(n.cases, 1, 100))]
    }

# now start by creating the intial models for each fold

  cat("creating initial models...\n\n")

  gbm.new.x <- orig.gbm.x

  for (i in 1:n.folds) {        
  
# create the training and prediction folds

    train.data <- orig.data[selector!=i,]
    weights.subset <- weights[selector != i]
    eval.data <- orig.data[selector==i,]

    model.list[[i]] <- eval(parse(text=gbm.call.string))  # create a fixed size object

# now make predictions to the withheld fold

    u_i <- eval.data[,gbm.y]
    y_i <- predict.gbm(model.list[[i]], eval.data, n.trees, "response")

    original.deviances[i] <- round(calc.deviance(u_i,y_i, family = family, calc.mean = TRUE),4)
 
  } # end of creating initial models

  n.steps <- 1
 
  while (n.steps <= n.drops & n.steps <= max.drops) {

    cat("dropping predictor",n.steps,"\n")

    for (i in 1:n.folds) {

# get the right data

    train.data <- orig.data[selector!=i,]
    eval.data <- orig.data[selector==i,]
    weights.subset <- weights[selector != i]

# get the current model details

    gbm.x <- model.list[[i]]$gbm.call$gbm.x
    n.preds <- length(gbm.x)
    these.pred.names <- model.list[[i]]$gbm.call$predictor.names
    contributions <- model.list[[i]]$contributions

# get the index number in pred.names of the last variable in the contribution table

    last.variable <- match(as.character(contributions[n.preds,1]),these.pred.names)
    gbm.new.x <- gbm.x[-last.variable]

# and keep a record of what has been dropped

    last.variable <- match(as.character(contributions[n.preds,1]),predictor.names)
    drop.count[last.variable,i] <- n.steps

    model.list[[i]] <- eval(parse(text=gbm.call.string))  # create a fixed size object

    u_i <- eval.data[,gbm.y]
    y_i <- predict.gbm(model.list[[i]],eval.data,n.trees,"response")

    deviance <- round(calc.deviance(u_i,y_i, family = family, calc.mean = TRUE),4)
    
# calculate difference between intial and new model by subtracting new from old because we want to minimise deviance

    dev.results[n.steps,i] <- round(deviance - original.deviances[i] ,4)

    }

  if (auto.stop){ # check to see if delta mean is less than original deviance error estimate

    delta.mean <- mean(dev.results[n.steps,])

    if (delta.mean < (alpha * original.deviance.se)) {
      n.drops <- n.drops + 1
      dev.results <- rbind(dev.results, rep(0,n.folds))
      }
    }   
  n.steps <- n.steps + 1
  }

# now label the deviance matrix

  dimnames(dev.results) <- list(paste("drop.",1:n.drops,sep=""),
   paste("rep.",1:n.folds,sep=""))

# calculate mean changes in deviance and their se

  mean.delta <- apply(dev.results,1,mean)
  se.delta <- sqrt(apply(dev.results,1,var))/sqrt(n.folds)

###########################

  if (plot) {
    y.max <- 1.5 * max(mean.delta + se.delta)
    y.min <- 1.5 * min(mean.delta - se.delta)
    plot(seq(0,n.drops),c(0,mean.delta),xlab="variables removed",
      ylab = "change in predictive deviance",type='l',ylim=c(y.min,y.max))
    lines(seq(0,n.drops),c(0,mean.delta) + c(0,se.delta),lty = 2)
    lines(seq(0,n.drops),c(0,mean.delta) - c(0,se.delta),lty = 2)
    abline(h = 0 , lty = 2, col = 3)
    min.y <- min(c(0,mean.delta))
    min.pos <- match(min.y,c(0,mean.delta)) - 1 # subtract one because now zero base
    abline(v = min.pos, lty = 3, col = 2)
    abline(h = original.deviance.se, lty = 2, col = 2)
    title(paste("RFE deviance - ",response.name," - folds = ",n.folds,sep=""))
  }

# and do a final backwards drop sequence from the original model

  cat("\nnow processing final dropping of variables with full data \n\n")

  gbm.call.string <- paste("try(gbm.fixed(data=orig.data,gbm.x=gbm.new.x,gbm.y=gbm.y,",sep="")
  gbm.call.string <- paste(gbm.call.string,"family=family,learning.rate=lr,tree.complexity=tc,",sep="")
  gbm.call.string <- paste(gbm.call.string,"n.trees = ",n.trees,", site.weights = weights,verbose=FALSE))",sep="")

  n.steps <- n.steps - 1 #decrement by one to reverse last increment in prev loop

  final.model <- gbm.object  # restore the original model and data
  train.data <- orig.data

# and set up storage

  final.drops <- matrix(NA, nrow = start.preds, ncol = 1)
  dimnames(final.drops) <- list(predictor.names,"step")

  for (i in 1:n.steps) {

# get the current model details

    gbm.x <- final.model$gbm.call$gbm.x
    n.preds <- length(gbm.x)
    these.pred.names <- final.model$gbm.call$predictor.names
    contributions <- final.model$contributions

    cat(i,"-",as.character(contributions[n.preds,1]),"\n")

# get the index number in pred.names of the last variable in the contribution table

    last.variable <- match(as.character(contributions[n.preds,1]),these.pred.names)
    gbm.new.x <- gbm.x[-last.variable]

# and keep a record of what has been dropped

    last.variable <- match(as.character(contributions[n.preds,1]),predictor.names)
    final.drops[last.variable] <- i

    final.model <- eval(parse(text=gbm.call.string))  # create a fixed size object

  }

#and then the corresponding numbers

  removal.list <- dimnames(final.drops)[[1]]
  removal.list <- removal.list[order(final.drops)]
  removal.list <- removal.list[1:n.drops]

  removal.numbers <- rep(0,n.steps)

# construct predictor lists to faciliate final model fitting

  for (i in 1:n.steps) {
    removal.numbers[i] <- match(removal.list[i],predictor.names)
    pred.list[[i]] <- orig.gbm.x[0-removal.numbers[1:i]]
    names(pred.list)[i] <- paste("preds.",i,sep="")
  }

  deviance.summary <- data.frame(mean = round(mean.delta,4), se = round(se.delta,4))

  final.drops <- data.frame("preds" = dimnames(final.drops)[[1]][order(final.drops)],
     "order" = final.drops[order(final.drops)])

  return(list(deviance.summary = deviance.summary, 
    deviance.matrix = dev.results, drop.count = drop.count, 
    final.drops = final.drops, pred.list = pred.list, 
    gbm.call = gbm.call))

}

"gbm.predict.grids" <-
function(model, new.dat, want.grids = F, preds2R = T, sp.name = "preds",pred.vec = NULL, filepath = NULL, 

num.col = NULL, num.row = NULL, xll = NULL, yll = NULL, cell.size = NULL, no.data = NULL, plot=F, full.grid=T, part.number=NULL, part.row = NULL, header = T) 
{
# J.Elith / J.Leathwick, March 07
# to make predictions to sites or grids. If to sites, the 
# predictions are written to the R workspace. If to grid, 
# the grids are written to a nominated directory and optionally also
# plotted in R
# 
# new data (new.dat) must be a data frame with column names identical 
# to names for all variables in the model used for prediction
#
# pred.vec is a vector of -9999's, the length of the scanned full grid
# (i.e. without nodata values excluded)
#
# filepath must specify the whole path as a character vector,but without the final file 
# name - eg "c:/gbm/"

temp <- predict.gbm(model, new.dat, n.trees=model$gbm.call$best.trees, type="response")

if(want.grids)
{
newname <- paste(filepath, sp.name,".asc", sep="")
full.pred <- pred.vec
full.pred[as.numeric(row.names(new.dat))] <- temp
if(header){
write(paste("ncols          ",num.col,sep=""),newname)
write(paste("nrows          ",num.row,sep=""),newname,append=T)
write(paste("xllcorner      ",xll,sep=""),newname,append=T)
write(paste("yllcorner      ",yll,sep=""),newname,append=T)
write(paste("cellsize       ",cell.size,sep=""),newname,append=T)
write(paste("NODATA_value ",no.data,sep=""),newname,append=T)
}

	if(full.grid){
         full.pred.mat <- matrix(full.pred, nrow=num.row, ncol=num.col, byrow=T)
	if (plot) 
	{
	image(z = t(full.pred.mat)[, nrow(full.pred.mat):1], zlim =  c(0,1), col = rev(topo.colors(12)))
	}


	write.table(full.pred.mat, newname, sep=" ", append=T, row.names=F, col.names=F)

	#also write to R directory, if required:

	if(preds2R){assign(sp.name,temp, pos=1)}
	}

	else{
         full.pred.mat <- matrix(full.pred, nrow=part.row, ncol=num.col, byrow=T)
	write.table(full.pred.mat, newname, sep=" ", append=T, row.names=F, col.names=F)
	if(preds2R){assign(paste(sp.name, part.number, sep=""),temp, pos=1)}
	}
	
}

else{
assign(sp.name,temp, pos=1)
}

}