import numpy as np import xarray as xr import matplotlib as mpl #mpl.use('Agg') # Must be before importing matplotlib.pyplot or pylab! import matplotlib.pyplot as plt import pandas as pd import os import datetime #import seaborn as sns import timeit import random #read the required data from those folders and combine them into one dataset, for w for now. # ensemble_numbers = np.arange(1,9) w_list = [] def subset_process_time(ds): #subset for the variable(s) ds = ds['w'] ds['time'] = ds.time.dt.round('T') return(ds) #loop through ensemble members for member_num in ensemble_numbers: start_time = timeit.default_timer() source_folder = '/bog/incoming/CHEESEHEAD/palm/realistic_runs/ches_IOP2/ensemble.member.'+str(member_num) file = 'DATA_3D_AV_NETCDF_N02slice' #Make a folder list and read in all the files from those folders. folder_list = os.listdir(source_folder) folder_list.sort() file_list = [] for folder in folder_list: file_list.append(source_folder + '/' +folder+'/'+file) file_list.sort() print(folder_list) #combine all the data along the time dimenstion #don't combine them sequentially, but infer the sequence from coordinate values #do this in parallel #pull in only those values with a time dimension ds = xr.open_mfdataset(file_list,parallel=True,preprocess=subset_process_time,chunks={"x":50,"y":50,"zw_3d":50}) ds.close() print('Member',str(member_num),'finished collecting data') #subset for time and day and collect into another list #for day1 w_list.append(ds) elapsed = timeit.default_timer() - start_time print('Done with member',str(member_num),' Time elapsed ',elapsed, 'seconds') print(w_list) w_ds = xr.concat(w_list,'ensemble') w_data = w_ds.mean('ensemble')['w'] start_time = timeit.default_timer() w_data.to_netcdf('data/out/w_hi_IOP02.nc') elapsed = timeit.default_timer() - start_time print('Finished writing w_hi',' Time elapsed ',elapsed, 'seconds')