#lag two datasets, so as to maximise their correlations maxcor <- function( ref, obs, refDATA=ref, obsDATA=obs, obsVARS=NULL, maxlag=100, hardlag=F, absolute=F) { #ref: vector with variable in refernce time frame #obs: vector with variable in time frame to be adjusted #refDATA: all data that carries time frame of ref #obsDATA: all data that carries time frame of obs #obsVARS: specify if only several columns in obsDATA shall be lagged #------------------ #high pass filter #only process the data with good quality: missing data <= 10% MissingV=length(which(is.na(obs)))/length(obs)*100 if (MissingV<=10) { # #intepolate data first # # MeanV=mean(obs,na.rm=TRUE) # # test <- obs # if(test[1] == "NAN"){ # test[1] <- MeanV # } # # if(test[length(test)] == "NAN"){ # test[length(test)] <- MeanV # } # # #length(which(is.na(test))) # # test <- approx(x=DOY, y=test, xout=DOY)$y # # test <- na.approx(test) #use approx:repeat the adjacent value #freqz(test) plot(test, type="l") #actual high pass filting #filter frequencies #import from top function freq <- 10 #nyquist frequency NY <- freq / 2 #cutoff frequency cutoff <- 1 / (maxlag / freq) bf1 <- butter(n=4, W=cutoff/NY, type="high") obsDATA <- filtfilt(bf1, test) obs <- obsDATA #freqz(obsDATA) lines(obsDATA, col=4) lines(obsDATA + MeanV, col=2) #assign dewpoint reference and EIDAS observation if(is.null(obsVARS)) { refDATA <- as.matrix(refDATA) obsDATA <- as.matrix(obsDATA) } else { refcol <- ncol(refDATA) refDATA <- cbind(refDATA, obsDATA) obsDATA <- as.matrix(obsDATA[,obsVARS]) } #test for correct lag time #for hard maxlag argument if(hardlag == T) { lagt <- ccf(ref, obs, lag.max = maxlag, plot = F, na.action = na.pass) lag <- ifelse(absolute == F, lagt$lag[which(lagt$acf == max(lagt$acf))], #(+): obs lags behind lagt$lag[which(abs(lagt$acf) == max(abs(lagt$acf)))] ) if(lag == maxlag) lag <- 0 #for soft maxlag argument } else { lag <- maxlag count <- 1 while(abs(lag) == maxlag) { if(count > 1) maxlag <- 2 * maxlag lagt <- ccf(ref, obs, lag.max = maxlag, plot = F, na.action = na.pass) lag <- ifelse(absolute == F, lagt$lag[which(lagt$acf == max(lagt$acf))], #(+): obs lags behind lagt$lag[which(abs(lagt$acf) == max(abs(lagt$acf)))] ) count <- count + 1 } } #adjust entire refDATA time series to EIDAS time (assuming constant timing offset over all variables) #ref <- lag(ref, k=lag*freq) if(lag < 0) { refDATA <- refDATA[1:(nrow(refDATA) + lag),] obsDATA <- obsDATA[(1 - lag):(nrow(obsDATA)),] } if(lag > 0) { #lag <- lag + 1 #necessary to achieve correct displacement (nested test with ccf) refDATA <- refDATA[(1 + lag):(nrow(refDATA)),] obsDATA <- obsDATA[1:(nrow(obsDATA) - lag),] } # nrow(refDATA) #50174 #if only certain variables within obsDATA shall be lagged: if(is.null(obsVARS)) { refOUT <- as.matrix(refDATA) obsOUT <- as.matrix(obsDATA) } else { refOUT <- as.matrix(refDATA[,1:refcol]) obsOUT <- as.matrix(refDATA[,(refcol+1):ncol(refDATA)]) obsOUT[,obsVARS] <- obsDATA } #prepare output if(ncol(refOUT) == 1) refOUT <- refOUT[,1] if(ncol(obsOUT) == 1) obsOUT <- obsOUT[,1] output <- list( refDATA=refOUT, obsDATA=obsOUT, lag=lag, ccf=max(lagt$acf) ) return(output) } if(MissingV>10) { ##if data is in poor quality, we don't process it. return (output <- list(refDATA, obsDATA, lag="NAN", ccf="NAN")) } }