#lag two datasets, so as to maximise their correlations maxcor <- function( ref, obs, refDATA=ref, obsDATA=obs, obsVARS=NULL, maxlag=100, hardlag=F, absolute=F) { #ref: vector with variable in refernce time frame #obs: vector with variable in time frame to be adjusted #refDATA: all data that carries time frame of ref #obsDATA: all data that carries time frame of obs #obsVARS: specify if only several columns in obsDATA shall be lagged #------------------ #high pass filter #only process the data with good quality: missing data <= 10% MissingV <- length(which(is.na(obs)))/length(obs)*100 if (MissingV<=10) { #intepolate data first MeanV=mean(obs,na.rm=TRUE) test <- obs if(test[1] == "NAN"){ test[1] <- MeanV } if(test[length(test)] == "NAN"){ test[length(test)] <- MeanV } #length(which(is.na(test))) # test <- approx(x=DOY, y=test, xout=DOY)$y test <- na.approx(test) #use approx:repeat the adjacent value #test <- approx(x=refDATA$DOY,y=obs,xout=refDATA$DOY, method="linear")$y MissingV1 <- length(which(is.na(test)))/length(test)*100 obs <- test #freqz(test) #plot(test, type="l") # #actual high pass filting # #filter frequencies # #import from top function # freq <- 10 # #nyquist frequency # NY <- freq / 2 # #cutoff frequency # cutoff <- 1 / (2* maxlag / freq) # # bf1 <- butter(n=4, W=cutoff/NY, type="high") # # #discard three filter lengths from start and end # not <- -c(1:(NY/cutoff*6), (length(test)-(NY/cutoff*6)):length(test)) # # obs <- filtfilt(bf1, test)[not] # ref <- ref[not] # # # #freqz(obsDATA) # #lines(obs, col=7) # MeanV=mean(test,na.rm=TRUE) # lines(obs + MeanV, col=7) # # #assign dewpoint reference and EIDAS observation if(is.null(obsVARS)) { refDATA <- as.matrix(refDATA) obsDATA <- as.matrix(obsDATA) } else { refcol <- ncol(refDATA) refDATA <- cbind(refDATA, obsDATA) obsDATA <- as.matrix(obsDATA[,obsVARS]) } #test for correct lag time #for hard maxlag argument if(hardlag == T) { lagt <- ccf(ref, obs, lag.max = maxlag, plot = F, na.action = na.pass) lag <- ifelse(absolute == F, lagt$lag[which(lagt$acf == max(lagt$acf))], #(+): obs lags behind lagt$lag[which(abs(lagt$acf) == max(abs(lagt$acf)))] ) if(lag == maxlag) lag <- 0 #for soft maxlag argument } else { lag <- maxlag count <- 1 while(abs(lag) == maxlag) { if(count > 1) maxlag <- 2 * maxlag lagt <- ccf(ref, obs, lag.max = maxlag, plot = F, na.action = na.pass) lag <- ifelse(absolute == F, lagt$lag[which(lagt$acf == max(lagt$acf))], #(+): obs lags behind lagt$lag[which(abs(lagt$acf) == max(abs(lagt$acf)))] ) count <- count + 1 } } #adjust entire refDATA time series to EIDAS time (assuming constant timing offset over all variables) #ref <- lag(ref, k=lag*freq) if(lag < 0) { refDATA <- refDATA[1:(nrow(refDATA) + lag),] obsDATA <- obsDATA[(1 - lag):(nrow(obsDATA)),] } if(lag > 0) { #lag <- lag + 1 #necessary to achieve correct displacement (nested test with ccf) refDATA <- refDATA[(1 + lag):(nrow(refDATA)),] obsDATA <- obsDATA[1:(nrow(obsDATA) - lag),] } # nrow(refDATA) #50174 #if only certain variables within obsDATA shall be lagged: if(is.null(obsVARS)) { refOUT <- as.matrix(refDATA) obsOUT <- as.matrix(obsDATA) } else { refOUT <- as.matrix(refDATA[,1:refcol]) obsOUT <- as.matrix(refDATA[,(refcol+1):ncol(refDATA)]) obsOUT[,obsVARS] <- obsDATA } #prepare output if(ncol(refOUT) == 1) refOUT <- refOUT[,1] if(ncol(obsOUT) == 1) obsOUT <- obsOUT[,1] output <- list( refDATA=refOUT, obsDATA=obsOUT, lag=lag, ccf=max(lagt$acf) ) return(output) } if(MissingV>10) { ##if data is in poor quality, we don't process it. return (output <- list(refDATA, obsDATA, lag="NAN", ccf="NAN")) } }