##CONFIDENCE INTERVAL FOR LINEAR REGRESSION #Assume that the error term ?? in the linear regression model is independent of x, and is normally distributed, with zero mean (and constant variance -> weights introduced). For a given value of x, the interval estimate for the mean of the dependent variable, y , is called the confidence interval. #http://www.r-tutor.com/content/confidence-interval-linear-regression #http://www.vias.org/tmdatanaleng/cc_regress_confidiv.html #http://www.weibull.com/DOEWeb/confidence_intervals_in_simple_linear_regression.htm #http://people.stfx.ca/bliengme/ExcelTips/RegressionAnalysisConfidence3.htm ##intercept and slope error always correct; ##confidence intervals only correct when intercept present conf_int <- function( ide, #indipendent variable ide_new=ide, #new sequence of independent variable (X) covering the desired range of confidence interval dep, #dependend variable (Y) fit, #fitted values (Y) coef_mn, #regression coefficients coef_se=NULL, #optional standard error of regression coefficients SSR=NULL, #sum of squared residuals in dependent variable (SSE in EIV regression) weights=1, #optional vector of weights for weighted regression lev_t=0.95 #level of (two-sided) confidence interval ){ #data preparation #same NAs in independent and dependent variables (no NAs returned in fitted values) ide[which(is.na(dep))] <- NA dep[which(is.na(ide))] <- NA #omit NAs ide <- na.omit(ide) dep <- na.omit(dep) #error message if dimensions not fitting if(length(coef_mn) %in% c(1,2) == F) return(print("wrong number of regression coefficients supplied")) if(length(ide) != length(dep)) return(print("length of independent / dependent variables do not match")) if(length(fit) != length(dep)) return(print("length of dependent / fitted variables do not match")) #statistics preparation ss <- length(fit) #sample size df <- 1 + (length(coef_mn) - 1) #degrees of freedom (http://www.ats.ucla.edu/stat/sas/output/reg.htm) lev_o <- 1 - (1 - lev_t) / 2 #one sided probability of t-distribution tval <- qt(lev_o, ss - df) #one sided t value #statistics #mean of independent variable if(length(coef_mn) == 1) ide_m <- 0 if(length(coef_mn) == 2) ide_m <- mean(ide) #sum of squared deviations in independent variable SSD <- sum((ide - ide_m)^2) #sum of squared residuals in dependent variable if(is.null(SSR)) SSR <- sum(weights * (fit - dep)^2) #mean squared deviation of dependent variable and fit MSD <- SSR / (ss - df) #assign result list result <- list() #standard error in regression coefficients #standard error in slope (http://www.physicsforums.com/showthread.php?t=194616) result$rcse <- sqrt(1 / (ss - df) * SSR / SSD) #standard error in intercept (http://talkstats.com/showthread.php?t=5056) if(length(coef_mn) == 2) result$rcse <- c(sqrt(MSD * sum(ide^2) / (ss * SSD)), result$rcse) #use if no standard errors have been submitted if(is.null(coef_se)) coef_se <- result$rcse #confidence intervals #of the regression coefficients result$rcco <- tval * coef_se #of the population mean -> predict(...,interval = c("confidence")) if(length(coef_mn) == 1) result$deco <- result$rcco * ide_new if(length(coef_mn) == 2) result$deco <- tval * sqrt(MSD * (1 / ss + (ide_new - ide_m)^2 / SSD)) #of future values -> predict(...,interval = c("prediction")) if(length(coef_mn) == 1) DEPR <- NULL if(length(coef_mn) == 2) DEPR <- tval * sqrt(MSD * (1 + 1 / ss + (ide_new - ide_m)^2 / SSD)) #aggregate and return results #regression coefficients result$rcse <-data.frame(matrix(result$rcse, ncol=length(coef_mn))) if(length(coef_mn) == 1) dimnames(result$rcse)[[2]] <- "slo" if(length(coef_mn) == 2) dimnames(result$rcse)[[2]] <- c("off", "slo") result$rcco <-data.frame(matrix(result$rcco, ncol=length(coef_mn))) dimnames(result$rcco)[[2]] <- dimnames(result$rcse)[[2]] #create new fitted values # if(length(ide_new) != length(fit)) { if(length(coef_mn) == 1) fit <- polyval(c(coef_mn, 0), ide_new) if(length(coef_mn) == 2) fit <- polyval(rev(coef_mn), ide_new) # } #aggregate results result$deco <- data.frame( X=ide_new, Y=fit, co_n=fit - result$deco, co_p=fit + result$deco ) if(!is.null(DEPR)) { result$depr <- data.frame( X=ide_new, Y=fit, pr_n=fit - DEPR, pr_p=fit + DEPR ) } #return results return(result) }