import matplotlib; matplotlib.use('agg')
import numpy as np
import datetime as dt
import cftime
from scipy.signal import convolve2d
from scipy import ndimage
import matplotlib.pylab as plt
from netCDF4 import Dataset
import glob
import networkx as nx
import sys


###################################################################
#####################  General Use Functions  #####################
###################################################################


def str2cftime(date_string, fmt, calendar):
    """
    Example usage: cftime_datetime = str2cftime('2021010100', '%Y%m%d%H', 'standard')
    Converts string time (date_string) in a strftime format (fmt) to a cftime datetime,
    using the calendar specified.
    
    Unfortunately, cftime does not have a strptime function. This function uses
    datetime's strptime function as an intermediate step.
    """
    this_dt = dt.datetime.strptime(date_string, fmt)
    return cftime.datetime(this_dt.year,this_dt.month,this_dt.day,
            this_dt.hour,this_dt.minute,this_dt.second, calendar=calendar)


def dtrange(begin_datetime, ending_datetime, interval_hours):
    """
    Example Usage: dt_list = dtrange(cftime.datetime(2021,1,1,0), cftime.datetime(2021,2,1,0), 3)
    3-hourly times from 00Z 2021-1-1 to 21Z 2021-1-31 (excludes ending_datetime)

    Analagous to range and Numpy's arange function, but for datetime objects.

    Returns a list of datetime objects.
    
    Works for both datetime.datetime and cftime.datetime.
    """
    total_sec = (ending_datetime - begin_datetime).total_seconds()
    return [begin_datetime + dt.timedelta(seconds=int(x)) for x in np.arange(0, total_sec, 3600*interval_hours)]


###################################################################
######################  LP Object Functions  ######################
###################################################################


def calc_scaled_average(data_in_accumulation_period, factor):
    """
    accumulated_data = calc_accumulation(data_in accumulation_period, factor)

    Calculate the sum and multiply by the data time interval to get the accumulation.
    -- data_in_accumulation_period[t,y,x] is a 3D array.
    -- factor gets multiplied by the mean. E.g., if the data is rain rate in mm/h,
       using factor of 24 would be in mm/day.
    """

    return factor * np.nanmean(data_in_accumulation_period, axis=0)


def identify_lp_objects(field, threshold, min_points=1
                        , object_is_gt_threshold=True, verbose=False):

    """
    label_im = identify_lp_objects(lon, lat, field, threshold
                            , object_minimum_gridpoints=0
                            , object_is_gt_threshold=True)

    Given an input data field (e.g., already accumulated and filtered),
    identify the LP Objects in that field. Return an array the same size
    as field, but with values indexed by object IDs.
    """

    field_bw = 0 * field
    if object_is_gt_threshold:
        field_bw[(field > threshold)] = 1
    else:
        field_bw[(field < threshold)] = 1

    label_im, nb_labels = ndimage.label(field_bw)
    if verbose:
        print('Found '+str(nb_labels)+' objects.', flush=True) # how many regions?

    label_points = ndimage.sum(1, label_im, range(nb_labels+1))

    throw_away = [x for x in range(1, nb_labels+1) if label_points[x] < min_points]
    if len(throw_away) > 0:
        if verbose:
            if str(len(throw_away)) == 1:
                print('Discarding ' + str(len(throw_away)) + ' feature that was < ' + str(min_points) + ' points.',flush=True)
            else:
                print('Discarding ' + str(len(throw_away)) + ' features that were < ' + str(min_points) + ' points.',flush=True)
        for nn in throw_away:
            label_im[label_im == nn] = 0

        ## Re-order LP Object IDs.
        label_im_old = label_im.copy()
        id_list = sorted(np.unique(label_im_old))
        for nn in range(len(id_list)):
            label_im[label_im_old == id_list[nn]] = nn

    return label_im


def calc_grid_cell_area(lon, lat):

    """
    area = calc_grid_cell_area(lon, lat)

    Given lon and lat arrays, calculate the area of each grid cell.
    - lon and lat don't need to be a uniform grid, but they need to be increasing
      in both the x and y direction for this function to work.
    - If 1-D arrays are given, they will be converted to 2D using np.meshgrid.
    """

    area = None
    if lon.ndim == 1:
        print('ERROR: lon and lat must be 2D arrays for function calc_grid_cell_area.', flush=True)
    else:
        ny,nx = lon.shape
        dlon = 0.0*lon
        dlat = 0.0*lat

        dlon[:,1:nx-1] = abs(0.5*(lon[:,1:nx-1] + lon[:,2:nx]) - 0.5*(lon[:,0:nx-2] + lon[:,1:nx-1]))
        dlon[:,0] = dlon[:,1]
        dlon[:,nx-1] = dlon[:,nx-2]
        dlat[1:ny-1,:] = abs(0.5*(lat[1:ny-1,:] + lat[2:ny,:]) - 0.5*(lat[0:ny-2,:] + lat[1:ny-1,:]))
        dlat[0,:] = dlat[1,:]
        dlat[ny-1,:] = dlat[ny-2,:]

        area = (dlat*111.195) * (dlon*111.195*np.cos(np.pi*lat/180.0))

    return area


def calculate_lp_object_properties(lon, lat, field, field_running, field_filtered, label_im
                        , object_minimum_gridpoints, end_of_accumulation_time
                        , verbose=False):

    nb_labels = np.max(label_im)
    mask = 1*label_im
    mask[label_im > 0] = 1

    ## If lon and lat not in 2d arrays, put them through np.meshgrid.
    if lon.ndim == 1:
        if verbose:
            print('Detected 1-D lat/lon. Using np.meshgrid to get 2d lat/lon.', flush=True)
        lon2, lat2 = np.meshgrid(lon, lat)
    else:
        lon2 = lon
        lat2 = lat

    X2, Y2 = np.meshgrid(np.arange(lon2.shape[1]), np.arange(lon2.shape[0]))

    area2d = calc_grid_cell_area(lon2, lat2)

    sizes = ndimage.sum(mask, label_im, range(1, nb_labels + 1))

    amean_instantaneous_field = ndimage.mean(field, label_im, range(1, nb_labels + 1))
    amean_running_field = ndimage.mean(field_running, label_im, range(1, nb_labels + 1))
    amean_filtered_running_field = ndimage.mean(field_filtered, label_im, range(1, nb_labels + 1))
    min_instantaneous_field = ndimage.minimum(field, label_im, range(1, nb_labels + 1))
    min_running_field = ndimage.minimum(field_running, label_im, range(1, nb_labels + 1))
    min_filtered_running_field = ndimage.minimum(field_filtered, label_im, range(1, nb_labels + 1))
    max_instantaneous_field = ndimage.maximum(field, label_im, range(1, nb_labels + 1))
    max_running_field = ndimage.maximum(field_running, label_im, range(1, nb_labels + 1))
    max_filtered_running_field = ndimage.maximum(field_filtered, label_im, range(1, nb_labels + 1))

    centroid_lon = ndimage.mean(lon2, label_im, range(1, nb_labels + 1))
    centroid_lat = ndimage.mean(lat2, label_im, range(1, nb_labels + 1))
    centroid_x = ndimage.mean(X2, label_im, range(1, nb_labels + 1))
    centroid_y = ndimage.mean(Y2, label_im, range(1, nb_labels + 1))
    area = ndimage.sum(area2d, label_im, range(1, nb_labels + 1))
    max_lon = ndimage.maximum(lon2, label_im, range(1, nb_labels + 1))
    min_lon = ndimage.minimum(lon2, label_im, range(1, nb_labels + 1))
    max_lat = ndimage.maximum(lat2, label_im, range(1, nb_labels + 1))
    min_lat = ndimage.minimum(lat2, label_im, range(1, nb_labels + 1))


    ## Assign LPT IDs. Order is by longitude. Use zero-base indexing.
    id0 = 1e10 * end_of_accumulation_time.year + 1e8 * end_of_accumulation_time.month + 1e6 * end_of_accumulation_time.day + 1e4 * end_of_accumulation_time.hour

    id = id0 + np.arange(len(centroid_lon))

    ## Prepare output dict.
    OBJ={}
    OBJ['id'] = id
    OBJ['label_im'] = label_im
    OBJ['lon'] = centroid_lon
    OBJ['lat'] = centroid_lat
    OBJ['min_lon'] = min_lon
    OBJ['min_lat'] = min_lat
    OBJ['max_lon'] = max_lon
    OBJ['max_lat'] = max_lat
    OBJ['x'] = centroid_x
    OBJ['y'] = centroid_y
    OBJ['n_points'] = sizes
    OBJ['area'] = area

    OBJ['amean_inst_field'] = amean_instantaneous_field
    OBJ['amean_running_field'] = amean_running_field
    OBJ['amean_filtered_running_field'] = amean_filtered_running_field
    OBJ['min_inst_field'] = min_instantaneous_field
    OBJ['min_running_field'] = min_running_field
    OBJ['min_filtered_running_field'] = min_filtered_running_field
    OBJ['max_inst_field'] = max_instantaneous_field
    OBJ['max_running_field'] = max_running_field
    OBJ['max_filtered_running_field'] = max_filtered_running_field

    # Grid stuff.
    OBJ['grid'] = {}
    OBJ['grid']['lon'] = lon
    OBJ['grid']['lat'] = lat
    OBJ['grid']['area'] = area2d

    return OBJ


def get_objid_datetime(objid,calendar='standard'):
    """
    usge: this_datetime = get_objid_datetime(this_objid, calendar)

    Get the datetime from an objid of form YYYYMMDDHHnnnn.
    """
    ymdh_int = int(np.floor(objid/1e4))
    ymdh_str = str(ymdh_int).zfill(10)
    return str2cftime(ymdh_str, '%Y%m%d%H', calendar)


def read_lp_object_properties(objid, objdir, property_list, verbose=False, fmt="/%Y/%m/%Y%m%d/objects_%Y%m%d%H.nc"):

    dt1 = get_objid_datetime(objid)
    fn1 = (objdir + '/' + dt1.strftime(fmt)).replace('///','/').replace('//','/')

    if verbose:
        print(fn1)

    DS1 = Dataset(fn1)
    id1 = DS1['objid'][:]
    idx1, = np.where(np.abs(id1 - objid) < 0.1)

    out_dict = {}
    for property in property_list:
        out_dict[property] = to1d(DS1[property][:][idx1])
    DS1.close()

    return out_dict


def get_latest_lp_object_time(objdir, level=3):
    obj_file_list = sorted(glob.glob((objdir + "/*"*level + "/*.nc")))
    last_obj_file = obj_file_list[-1]
    return dt.datetime.strptime(last_obj_file[-13:-3], "%Y%m%d%H")


##################################################################
######################  Tracking Functions  ######################
##################################################################

def to1d(ndarray_or_ma):
    try:
        fout = ndarray_or_ma.compressed()
    except:
        fout = ndarray_or_ma.flatten()
    return fout


def calc_overlapping_points(objid1, objid2, objdir, fmt="/%Y/%m/%Y%m%d/objects_%Y%m%d%H.nc"):

    dt1 = get_objid_datetime(objid1)
    dt2 = get_objid_datetime(objid2)

    fn1 = (objdir + '/' + dt1.strftime(fmt)).replace('///','/').replace('//','/')
    fn2 = (objdir + '/' + dt2.strftime(fmt)).replace('///','/').replace('//','/')

    DS1 = Dataset(fn1)
    id1 = DS1['objid'][:]
    idx1, = np.where(id1 == objid1)
    x1 = to1d(DS1['pixels_x'][:][idx1])
    y1 = to1d(DS1['pixels_y'][:][idx1])
    DS1.close()


    DS2 = Dataset(fn2)
    id2 = DS2['objid'][:]
    idx2, = np.where(id2 == objid2)
    x2 = to1d(DS2['pixels_x'][:][idx2])
    y2 = to1d(DS2['pixels_y'][:][idx2])
    DS2.close()

    xy1 = set(zip(x1,y1))
    xy2 = set(zip(x2,y2))

    overlap = [x in xy2 for x in xy1]

    OUT = (len(x1), len(x2), np.sum(overlap))
    del x1
    del y1
    del x2
    del y2
    del xy1
    del xy2

    return OUT


def init_lpt_graph(dt_list, objdir, min_points = 1, fmt = "/%Y/%m/%Y%m%d/objects_%Y%m%d%H.nc"):

    G = nx.DiGraph() # Empty graph
    REFTIME = cftime.datetime(1970,1,1,0,0,0,calendar=dt_list[0].calendar) ## Only used internally.

    for this_dt in dt_list:

        print(this_dt)
        fn = (objdir + '/' + this_dt.strftime(fmt)).replace('///','/').replace('//','/')
        print(fn)
        try:
            DS = Dataset(fn)
            try:
                id_list = DS['objid'][:]
                lon = DS['centroid_lon'][:]
                lat = DS['centroid_lat'][:]
                area = DS['area'][:]
                pixels_x = DS['pixels_x'][:]
            except IndexError:
                print('WARNING: No LPO at this time: ' + str(this_dt),flush=True)
                id_list = [] # In case of no LPOs at this time.
            DS.close()

            for ii in range(len(id_list)):
                npts = pixels_x[ii,:].count()  #ma.count() for number of non masked values.
                if npts >= min_points:
                    G.add_node(int(id_list[ii]), timestamp=(this_dt - REFTIME).total_seconds()
                        , lon = lon[ii], lat=lat[ii], area=area[ii]
                        , pos = (lon[ii], (this_dt - REFTIME).total_seconds()))

        except FileNotFoundError:
            print('WARNING: Missing this file!',flush=True)

    return G

def get_lpo_overlap(dt1, dt2, objdir, min_points=1, fmt="/%Y/%m/%Y%m%d/objects_%Y%m%d%H.nc"):

    ##
    ## Read in LPO masks for current and previous times.
    ##
    fn1 = (objdir + '/' + dt1.strftime(fmt)).replace('///','/').replace('//','/')
    fn2 = (objdir + '/' + dt2.strftime(fmt)).replace('///','/').replace('//','/')

    DS1 = Dataset(fn1, 'r')
    mask1 = DS1['grid_mask'][:]
    objid1 = DS1['objid'][:]
    DS1.close()

    DS2 = Dataset(fn2, 'r')
    mask2 = DS2['grid_mask'][:]
    objid2 = DS2['objid'][:]
    DS2.close()

    ##
    ## Apply minimum size.
    ## Any LPOs smaller than the minimum size get taken out of the mask.
    ## Their mask values get set to zero, and they will not be considered
    ## for overlapping.
    ##
    if min_points > 1:
        sizes = ndimage.sum(1, mask1, range(np.nanmax(mask1)+1))
        for nn in [x for x in range(len(sizes)) if sizes[x] < min_points]:
            mask1[mask1 == nn] = -1

        sizes = ndimage.sum(1, mask2, range(np.nanmax(mask2)+1))
        for nn in [x for x in range(len(sizes)) if sizes[x] < min_points]:
            mask2[mask2 == nn] = -1

    ##
    ## Each overlap must necessarily be one LPO against another single LPO.
    ##
    overlap = np.logical_and(mask1 > -1, mask2 > -1)
    label_im, nb_labels = ndimage.label(overlap)

    ########################################################################
    ## Construct overlapping points "look up table" array.
    ## Then, we will use this array as a look up table for specific LPOs.
    ##   -----------> objid2
    ##   |
    ##   |
    ##   |
    ##   v
    ## objid1
    ########################################################################

    overlapping_npoints = np.zeros([len(objid1), len(objid2)])
    overlapping_frac1 = np.zeros([len(objid1), len(objid2)])
    overlapping_frac2 = np.zeros([len(objid1), len(objid2)])

    for nn in range(1,nb_labels+1):
        ## Figure out which LPOs this represents.
        ii = int(ndimage.maximum(mask1, label_im, nn))
        jj = int(ndimage.maximum(mask2, label_im, nn))

        overlapping_npoints[ii,jj] += ndimage.sum(overlap, label_im, nn)
        overlapping_frac1[ii,jj] = overlapping_npoints[ii,jj] / np.sum(mask1==ii)
        overlapping_frac2[ii,jj] = overlapping_npoints[ii,jj] / np.sum(mask2==jj)

    ## Prepare outputs.
    OVERLAP={}
    OVERLAP['npoints'] = overlapping_npoints
    OVERLAP['frac1'] = overlapping_frac1
    OVERLAP['frac2'] = overlapping_frac2
    return OVERLAP


def connect_lpt_graph(G0, options, min_points=1, verbose=False, fmt="/%Y/%m/%Y%m%d/objects_%Y%m%d%H.nc"):

    """
    usage: LPT = calc_lpt_group_array(LPT, objdir, options)
    Calculate the simple LPT groups.

    options dictionary entries needed:
    options['objdir']
    options['min_overlap_points']
    options['min_overlap_frac']

    "LPT" is a 2-D "group" array (np.int64) with columns: [timestamp, objid, lpt_group_id, begin_point, end_point, split_point]
    -- timestamp = Linux time stamp (e.g., seconds since 00 UTC 1970-1-1)
    -- objid = LP object id (YYYYMMDDHHnnnn)
    -- lpt_group_id = LPT group id, connected LP objects have a common LPT group id.
    -- begin point = 1 if it is the beginning of a track. 0 otherwise.
    -- end point = 1 if no tracks were connected to it, 0 otherwise.
    -- split point = 1 if split detected, 0 otherwise.

    BRANCHES is a 1-D native Python list with native Python int values.
    This is needed because BRANCHES is bitwise, and there can be more than 64 branches in a group.
    -- branches = bitwise binary starts from 1 at each branch. Mergers will have separate branch numbers.
                   overlapping portions will have multiple branch numbers associated with them.
    """

    # Make copies to avoid immutability weirdness.
    Gnew = G0.copy()
    objdir = options['objdir']

    lpo_id_list = list(G0.nodes())
    datetime_list = [get_objid_datetime(x,options['calendar']) for x in lpo_id_list]
    timestamp_list = [int((x - cftime.datetime(1970,1,1,0,0,0,calendar=datetime_list[0].calendar)).total_seconds()) for x in datetime_list]

    ## Now, loop through the times.
    unique_timestamp_list = np.unique(timestamp_list)
    for tt in range(1,len(unique_timestamp_list)):

        ## Datetimes for this time and previous time.
        this_timestamp = unique_timestamp_list[tt]
        prev_timestamp = unique_timestamp_list[tt-1]
        this_dt = cftime.datetime(1970,1,1,0,0,0,calendar=options['calendar']) + dt.timedelta(seconds=int(this_timestamp))
        prev_dt = cftime.datetime(1970,1,1,0,0,0,calendar=options['calendar']) + dt.timedelta(seconds=int(prev_timestamp))
        print(this_dt, flush=True)

        ## Get overlap points.
        OVERLAP = get_lpo_overlap(this_dt, prev_dt, objdir, fmt=fmt, min_points = min_points)
        overlapping_npoints = OVERLAP['npoints']
        overlapping_frac1 = OVERLAP['frac1']
        overlapping_frac2 = OVERLAP['frac2']

        ## The indices (e.g., LPT and BRANCHES array rows) for these times.
        this_time_idx, = np.where(timestamp_list == this_timestamp)
        prev_time_idx, = np.where(timestamp_list == prev_timestamp)

        for ii in this_time_idx:
            this_objid = lpo_id_list[ii]
            idx1 = int(str(this_objid)[-4:])

            ## 1) Figure out which "previous time" LPT indices overlap.
            matches = []
            for jj in prev_time_idx:
                prev_objid = lpo_id_list[jj]
                idx2 = int(str(prev_objid)[-4:])

                n_overlap = overlapping_npoints[idx1,idx2]
                frac1 = overlapping_frac1[idx1,idx2]
                frac2 = overlapping_frac2[idx1,idx2]

                if n_overlap >= options['min_overlap_points']:
                    matches.append(jj)
                elif 1.0*frac1 > options['min_overlap_frac']:
                    matches.append(jj)
                elif 1.0*frac2 > options['min_overlap_frac']:
                    matches.append(jj)
            if verbose:
                print(str(this_objid),flush=True)

            ## 2) Link the previous LPT Indices to the group.
            ##    If no matches, it will skip this loop.
            for match in matches:
                if verbose:
                    print(' --> with: ' + str(lpo_id_list[match]),flush=True)
                ## Add it as a graph edge.
                Gnew.add_edge(lpo_id_list[match],this_objid)

    return Gnew


def lpt_graph_allow_falling_below_threshold(G, options, min_points=1, fmt="/%Y/%m/%Y%m%d/objects_%Y%m%d%H.nc", verbose=False):
    """
    Check duration of "leaf" (e.g., "this") to "root" nodes of other DAGs, and connect if less than
    center_jump_max_hours.
    """

    objdir=options['objdir']
    # Get connected components of graph.
    CC = list(nx.connected_components(nx.to_undirected(G)))
    SG = [G.subgraph(CC[x]).copy() for x in range(len(CC))]

    for kk in range(len(SG)):

        end_nodes = [x for x in SG[kk].nodes() if SG[kk].out_degree(x)==0 and SG[kk].in_degree(x)>=1]
        if len(end_nodes) < 0:
            continue

        for ll in range(len(SG)):
            if ll == kk:
                continue

            begin_nodes = [x for x in SG[ll].nodes() if SG[ll].out_degree(x)>=1 and SG[ll].in_degree(x)==0]
            if len(begin_nodes) < 0:
                continue

            for kkkk in end_nodes:
                kkkk_idx = kkkk - int(1000*np.floor(kkkk/1000))
                for llll in begin_nodes:

                    # When end nodes are linked to begin nodes, they are
                    # removed from the list of end/begin nodes.
                    # Therefore, check whether these have already been removed.
                    # If they were already removed, it means the edge was already
                    # added to the graph, and we want to avoid the end node from
                    # potentially being linked to another begin node further upstream.
                    # When this happened, I was getting duplicate LPTs.
                    if not kkkk in end_nodes or not llll in begin_nodes:
                        continue

                    llll_idx = llll - int(1000*np.floor(llll/1000))
                    hours_diff = (get_objid_datetime(llll)-get_objid_datetime(kkkk)).total_seconds()/3600.0
                    if hours_diff > 0.1 and hours_diff < options['fall_below_threshold_max_hours']+0.1:

                        begin_dt = get_objid_datetime(llll)
                        end_dt = get_objid_datetime(kkkk)

                        OVERLAP = get_lpo_overlap(end_dt, begin_dt, objdir, fmt=fmt, min_points = min_points)
                        overlapping_npoints = OVERLAP['npoints']
                        overlapping_frac1 = OVERLAP['frac1']
                        overlapping_frac2 = OVERLAP['frac2']

                        n_overlap = overlapping_npoints[kkkk_idx, llll_idx]
                        frac1 = overlapping_frac1[kkkk_idx, llll_idx]
                        frac2 = overlapping_frac2[kkkk_idx, llll_idx]

                        if n_overlap >= options['min_overlap_points']:
                            print('Overlap: '+ str(kkkk) + ' --> ' + str(llll) + '!', flush=True)
                            G.add_edge(kkkk,llll)
                            end_nodes.remove(kkkk)
                            begin_nodes.remove(llll)
                        elif 1.0*frac1 > options['min_overlap_frac']:
                            print('Overlap: '+ str(kkkk) + ' --> ' + str(llll) + '!', flush=True)
                            G.add_edge(kkkk,llll)
                            end_nodes.remove(kkkk)
                            begin_nodes.remove(llll)
                        elif 1.0*frac2 > options['min_overlap_frac']:
                            print('Overlap: '+ str(kkkk) + ' --> ' + str(llll) + '!', flush=True)
                            G.add_edge(kkkk,llll)
                            end_nodes.remove(kkkk)
                            begin_nodes.remove(llll)

    return G


def lpt_graph_remove_short_duration_systems(G, min_duration
                        , latest_datetime = dt.datetime(3000,1,1,0,0,0)):

    nodes_master_list = list(G.nodes())
    Gundirected = nx.to_undirected(G)

    while(len(nodes_master_list) > 0):
        print(('N = ' + str(len(nodes_master_list)) + ' nodes left to check.'), flush=True)
        nodes_component_list = list(nx.node_connected_component(Gundirected, nodes_master_list[0]))

        min_lpo_id = np.nanmin(nodes_component_list)
        max_lpo_id = np.nanmax(nodes_component_list)
        min_dt = get_objid_datetime(min_lpo_id)
        max_dt = get_objid_datetime(max_lpo_id)
        duration = (max_dt - min_dt).total_seconds()/3600.0
        if duration < min_duration - 0.1:
            G.remove_nodes_from(nodes_component_list)

        for x in nodes_component_list:
            nodes_master_list.remove(x)

    return G


def get_short_ends(G):

    Grev = G.reverse() # Reversed graph is used for splits below.

    ## Break in to individual paths (e.g., LPT branches).
    roots = []
    leaves = []
    for node in G.nodes:
        if G.in_degree(node) == 0: # it's a root
            roots.append(node)
        elif G.out_degree(node) == 0: # it's a leaf
            leaves.append(node)

    ## Root short ends -- mergers.
    Plist_mergers = []
    for root in roots:
        this_short_end = [root]
        this_node = root
        more_to_do = True
        while more_to_do:
            more_to_do = False
            next_node = list(G[this_node])[0]
            this_short_end.append(next_node)
            if G.in_degree(next_node) == 1 and G.out_degree(next_node) == 1:
                this_node = next_node
                more_to_do = True
        Plist_mergers.append(this_short_end)

    ## Leaf short ends -- splits.
    Plist_splits = []
    for leaf in leaves:
        this_short_end = [leaf]
        this_node = leaf
        more_to_do = True
        while more_to_do:
            more_to_do = False
            next_node = list(Grev[this_node])[0]
            this_short_end.append(next_node)
            if Grev.in_degree(next_node) == 1 and Grev.out_degree(next_node) == 1:
                this_node = next_node
                more_to_do = True
        Plist_splits.append(this_short_end)

    return (Plist_mergers, Plist_splits)


def lpt_graph_remove_short_ends(G, min_duration_to_keep):

    ## Work on each connected component (DAG, directed acyclical graph) separately.
    ## First, I get a list of DAG sub groups to loop through.
    CC = list(nx.connected_components(nx.to_undirected(G)))
    SG = [G.subgraph(CC[x]).copy() for x in range(len(CC))]

    ## Loop over each DAG (LPG Group)
    for kk in range(len(SG)):
        more_to_do = True
        print('--> LPT group ' + str(kk+1) + ' of ' + str(len(SG)),flush=True)
        niter = 0
        while more_to_do:
            niter += 1
            more_to_do = False
            areas = nx.get_node_attributes(SG[kk],'area') # used for tie breaker if same duration

            Plist_mergers, Plist_splits = get_short_ends(SG[kk])
            print('----> Iteration #'+str(niter)+': Found '+str(len(Plist_mergers))+' merge ends and '+str(len(Plist_splits))+' split ends.',flush=True)

            nodes_to_remove = []
            ## Handle mergers.
            if len(Plist_mergers) > 1: # Don't bother if only one root short end.

                merger_datetimes = [get_objid_datetime(x[-1]) for x in Plist_mergers]
                merger_timestamps = np.array([(x - cftime.datetime(1970,1,1,0,0,0,calendar=merger_datetimes[0].calendar)).total_seconds()/3600 for x in merger_datetimes])
                for iiii in range(len(Plist_mergers)):
                    path1 = Plist_mergers[iiii]
                    # Don't use the last node, as it intersects the paths I want to keep.
                    dur1 = (get_objid_datetime(path1[-2]) - get_objid_datetime(path1[0])).total_seconds()/3600.0

                    ## Check whether intersections with any others
                    override_removal = False
                    others = list(range(len(Plist_mergers)))
                    others.remove(iiii)
                    found_intersecting_short_end = False
                    for jjjj in others:
                        path2 = Plist_mergers[jjjj]
                        if path1[-1] == path2[-1]: #Make sure I am comparing short ends that TOUCH.
                            found_intersecting_short_end = True
                            dur2 = (get_objid_datetime(path2[-2]) - get_objid_datetime(path2[0])).total_seconds()/3600.0
                            if dur1 > dur2:
                                override_removal = True
                            elif dur1 == dur2:
                                ## Tiebreaker is integrated area in time.
                                integrate_area1 = np.nansum([areas[x] for x in path1])
                                integrate_area2 = np.nansum([areas[x] for x in path2])
                                if integrate_area1 >= integrate_area2:
                                    override_removal = True
                    if not found_intersecting_short_end:
                        # Check if it is the earliest merger time.
                        if merger_timestamps[iiii] == np.min(merger_timestamps):
                            override_removal = True

                    if dur1 < min_duration_to_keep - 0.01 and not override_removal:
                        ## NOTE: KC20 code used min_duration_to_keep PLUS 0.1 above.
                        ##       Hence, the parameter wasn't strictly the *minimum*
                        ##       duration to keep a branch, since short ends
                        ##       with duration *exactly* equal to min_duration_to_keep
                        ##       would have been discarded.

                        ## Make sure I wouldn't remove any parts of the cycles
                        nodes_to_remove += path1[:-1] # Don't remove the last one. It intersects the paths I want to keep.

            ## Handle splits. NOTE: The ordering here is REVERSED in time.
            if len(Plist_splits) > 1: # Don't bother if only one leaf short end.

                split_datetimes = [get_objid_datetime(x[-1]) for x in Plist_splits]
                split_timestamps = np.array([(x - cftime.datetime(1970,1,1,0,0,0,calendar=split_datetimes[0].calendar)).total_seconds()/3600 for x in split_datetimes])

                for iiii in range(len(Plist_splits)):
                    path1 = Plist_splits[iiii]
                    # Don't use the last node, as it intersects the paths I want to keep.
                    dur1 = (get_objid_datetime(path1[0]) - get_objid_datetime(path1[-2])).total_seconds()/3600.0

                    ## Check whether intersections with any others
                    override_removal = False
                    others = list(range(len(Plist_splits)))
                    others.remove(iiii)
                    found_intersecting_short_end = False
                    for jjjj in others:
                        path2 = Plist_splits[jjjj]
                        if path1[-1] == path2[-1]:  #Make sure I am comparing short ends that TOUCH.
                                                    # using index [-1] works here because order is reversed, from get_short_ends
                            found_intersecting_short_end = True
                            dur2 = (get_objid_datetime(path2[0]) - get_objid_datetime(path2[-2])).total_seconds()/3600.0
                            if dur1 > dur2:
                                override_removal = True
                            elif dur1 == dur2:
                                ## Tiebreaker is integrated area in time.
                                integrate_area1 = np.nansum([areas[x] for x in path1])
                                integrate_area2 = np.nansum([areas[x] for x in path2])
                                if integrate_area1 >= integrate_area2:
                                    override_removal = True
                    if not found_intersecting_short_end:
                        # Check if it is the latest split time.
                        if split_timestamps[iiii] == np.max(split_timestamps):
                            override_removal = True


                    if dur1 < min_duration_to_keep + 0.1 and not override_removal:
                        ## Make sure I wouldn't remove any parts of the cycles
                        nodes_to_remove += path1[:-1] # Don't remove the last one. It intersects the paths I want to keep.

            if len(nodes_to_remove) > 0:
                G.remove_nodes_from(nodes_to_remove)
                SG[kk].remove_nodes_from(nodes_to_remove)

                ## In some situations, the branch removal process leaves behind "isolates"
                ##  -- Nodes without any neighbors! This has occurred in the following situation:
                ##                   *
                ##                   *
                ##                   *     ^
                ##                   *    * *
                ##                   *   *   *
                ##                 *   *
                ##               *
                ##             *
                ##             *
                ## Where ^ is the node that was left "stranded", e.g, the isolate.
                ## To handle this, remove any isolates from main graph and current sub graph.
                isolates_list = list(nx.isolates(SG[kk]))
                if len(isolates_list) > 0:
                    G.remove_nodes_from(isolates_list)
                    SG[kk].remove_nodes_from(isolates_list)

                more_to_do = True

    return G


def initialize_time_cluster_fields(TC, length):

    ## Fields initialized to zero.
    for field in ['nobj','area','centroid_lon','centroid_lat'
                ,'largest_object_centroid_lon','largest_object_centroid_lat'
                ,'amean_inst_field','amean_running_field','amean_filtered_running_field']:
        TC[field] = np.zeros(length)

    ## Fields initialized to 999.0.
    for field in ['min_lon','min_lat','min_inst_field','min_running_field'
                ,'min_filtered_running_field']:
        TC[field] =  999.0 * np.ones(length)

    ## Fields initialized to -999.0.
    for field in ['max_lon','max_lat','max_inst_field','max_running_field'
                ,'max_filtered_running_field']:
        TC[field] = -999.0 * np.ones(length)

    return TC


def calc_lpt_properties_without_branches(G, options, fmt="/%Y/%m/%Y%m%d/objects_%Y%m%d%H.nc"):
    ## The branch nodes of G have the properties of timestamp, lon, lat, and area.
    TC_all = []

    ## First, I get a list of DAG sub groups to loop through.
    CC = list(nx.connected_components(nx.to_undirected(G)))
    SG = [G.subgraph(CC[x]).copy() for x in range(len(CC))]

    ## Loop over each DAG
    for kk in range(len(SG)):
        print('--> LPT group ' + str(kk+1) + ' of ' + str(len(SG)),flush=True)

        TC_this = {}
        TC_this['lpt_group_id'] = kk
        TC_this['lpt_id'] = 1.0*kk

        TC_this['objid'] = sorted(list(SG[kk].nodes()))
        ts=nx.get_node_attributes(SG[kk],'timestamp')
        timestamp_all = [ts[x] for x in TC_this['objid']]
        TC_this['timestamp'] = np.unique(timestamp_all)
        TC_this['datetime'] = [dt.datetime(1970,1,1,0,0,0) + dt.timedelta(seconds=int(x)) for x in TC_this['timestamp']]

        ##
        ## Sum/average the LPTs to get bulk/mean properties at each time.
        ##

        ## Initialize
        TC_this = initialize_time_cluster_fields(TC_this, len(TC_this['timestamp']))

        ## Loop over unique time stamps.
        for tt in range(len(TC_this['timestamp'])):
            max_area_already_used = -999.0
            this_objid_list = [TC_this['objid'][x] for x in range(len(TC_this['objid'])) if timestamp_all[x] == TC_this['timestamp'][tt]]
            for this_objid in this_objid_list:

                OBJ = read_lp_object_properties(this_objid, options['objdir']
                        , ['centroid_lon','centroid_lat','area','pixels_x','pixels_y'
                        ,'min_lon','max_lon','min_lat','max_lat'
                        ,'amean_inst_field','amean_running_field','max_inst_field','max_running_field'
                        ,'min_inst_field','min_running_field','min_filtered_running_field'
                        ,'amean_filtered_running_field','max_filtered_running_field'], fmt=fmt)

                TC_this['nobj'][tt] += 1
                TC_this['area'][tt] += OBJ['area']
                TC_this['centroid_lon'][tt] += OBJ['centroid_lon'] * OBJ['area']
                TC_this['centroid_lat'][tt] += OBJ['centroid_lat'] * OBJ['area']
                if OBJ['area'] > max_area_already_used:
                    TC_this['largest_object_centroid_lon'][tt] = 1.0*OBJ['centroid_lon']
                    TC_this['largest_object_centroid_lat'][tt] = 1.0*OBJ['centroid_lat']
                    max_area_already_used = 1.0*OBJ['area']

                TC_this['min_lon'][tt] = min((TC_this['min_lon'][tt], OBJ['min_lon']))
                TC_this['min_lat'][tt] = min((TC_this['min_lat'][tt], OBJ['min_lat']))
                TC_this['max_lon'][tt] = max((TC_this['max_lon'][tt], OBJ['max_lon']))
                TC_this['max_lat'][tt] = max((TC_this['max_lat'][tt], OBJ['max_lat']))

                TC_this['amean_inst_field'][tt] += OBJ['amean_inst_field'] * OBJ['area']
                TC_this['amean_running_field'][tt] += OBJ['amean_running_field'] * OBJ['area']
                TC_this['amean_filtered_running_field'][tt] += OBJ['amean_filtered_running_field'] * OBJ['area']
                TC_this['min_inst_field'][tt] = min((TC_this['min_inst_field'][tt], OBJ['min_inst_field']))
                TC_this['min_running_field'][tt] = min((TC_this['min_running_field'][tt], OBJ['min_running_field']))
                TC_this['min_filtered_running_field'][tt] = min((TC_this['min_filtered_running_field'][tt], OBJ['min_filtered_running_field']))
                TC_this['max_inst_field'][tt] = max((TC_this['max_inst_field'][tt], OBJ['max_inst_field']))
                TC_this['max_running_field'][tt] = max((TC_this['max_running_field'][tt], OBJ['max_running_field']))
                TC_this['max_filtered_running_field'][tt] = max((TC_this['max_filtered_running_field'][tt], OBJ['max_filtered_running_field']))

            TC_this['centroid_lon'][tt] /= TC_this['area'][tt]
            TC_this['centroid_lat'][tt] /= TC_this['area'][tt]

            TC_this['amean_inst_field'][tt] /= TC_this['area'][tt]
            TC_this['amean_running_field'][tt] /= TC_this['area'][tt]
            TC_this['amean_filtered_running_field'][tt] /= TC_this['area'][tt]

        ## Least squares linear fit for propagation speed.
        Pzonal = np.polyfit(TC_this['timestamp'],TC_this['centroid_lon'],1)
        TC_this['zonal_propagation_speed'] = Pzonal[0] * 111000.0  # deg / s --> m / s

        Pmeridional = np.polyfit(TC_this['timestamp'],TC_this['centroid_lat'],1)
        TC_this['meridional_propagation_speed'] = Pmeridional[0] * 111000.0  # deg / s --> m / s

        TC_all.append(TC_this)

    return TC_all


def get_list_of_path_graphs(G):

    Plist=[] # initialize empty list.

    ## Break in to individual paths (e.g., LPT branches).
    roots = []
    leaves = []
    for node in G.nodes:
        if G.in_degree(node) == 0: # it's a root
            roots.append(node)
        elif G.out_degree(node) == 0: # it's a leaf
            leaves.append(node)

    iii=-1
    for root in roots:
        iii+=1
        print(('    root ' + str(iii) + ' of max ' + str(len(roots)-1) + '.'), flush=True)
        for leaf in leaves:
            for path in nx.all_simple_paths(G, source=root, target=leaf):
                Plist.append(G.subgraph(path).copy())  # Add to list.

    return Plist


def get_list_of_path_graphs_rejoin_cycles(G):

    #########################################################
    ## 1. For any path intersecting with a cycle, add all nodes of the cycle.
    ## 2. Remove duplicate paths.

    cycles = nx.cycle_basis(nx.to_undirected(G))
    if len(cycles) > 1:
        ## In some cases, there are cycles that touch eathother. Cycles that toush are treated
        ##   together all at once, and hence need to be combined.
        for cc in range(len(cycles)):
            for cccc in range(len(cycles)):
                if cccc == cc:
                    continue
                if len(set(cycles[cc]).intersection(set(cycles[cccc]))) > 0:
                    cycles[cc] = list(set(cycles[cc]).union(set(cycles[cccc])))
                    cycles[cccc] = list(set(cycles[cc]).union(set(cycles[cccc])))

    Plist = get_list_of_path_graphs(G)

    for ii in range(len(Plist)):
        for C in cycles:
            intersection_nodes = set(C).intersection(set(Plist[ii].nodes()))
            intersecton_times = [get_objid_datetime(x) for x in intersection_nodes]
            if len(intersection_nodes) > 0:
                Cadd = []
                for this_node in C:
                    if get_objid_datetime(this_node) in intersecton_times:
                        Cadd.append(this_node)
                Plist[ii].add_nodes_from(Cadd)  #Only nodes are copied, not edges. But that's all I need.

    ## This step eliminates duplicates.
    for ii in range(len(Plist)):
        if ii == 0:
            Plist_new = [Plist[ii]]
        else:
            ## Check whether it is already in Plist_new.
            ##  Use XOR on the set of nodes.
            include_it = True
            for P in Plist_new:
                if len(set(Plist[ii].nodes()) ^ set(P.nodes())) == 0:
                    include_it = False
                    break
            if include_it:
                Plist_new.append(Plist[ii])

    return Plist_new


def calc_lpt_properties_with_branches(G, options, fmt="/%Y/%m/%Y%m%d/objects_%Y%m%d%H.nc"):
    ## The branch nodes of G have the properties of timestamp, lon, lat, and area.
    TC_all = []

    ## First, I get a list of DAG sub groups to loop through.
    CC = list(nx.connected_components(nx.to_undirected(G)))
    SG = [G.subgraph(CC[x]).copy() for x in range(len(CC))]

    ## Loop over each DAG
    for kk in range(len(SG)):
        print('--> LPT group ' + str(kk+1) + ' of ' + str(len(SG)),flush=True)

        #Plist = get_list_of_path_graphs(SG[kk])
        Plist = get_list_of_path_graphs_rejoin_cycles(SG[kk])

        if len(Plist) == 1:
            print('----> Found '+str(len(Plist))+' LPT system.',flush=True)
        else:
            print('----> Found '+str(len(Plist))+' LPT systems.',flush=True)


        ## Get "timeclusters" for each branch.
        for iiii in range(len(Plist)):
            path1 = Plist[iiii]

            PG = SG[kk].subgraph(path1).copy()

            TC_this = {}
            TC_this['lpt_group_id'] = kk
            TC_this['lpt_id'] = 1.0*kk + (iiii+1)/max(10.0,np.power(10,np.ceil(np.log10(len(Plist)))))
                                            ## ^ I should probably account for possible cycles here.
            TC_this['objid'] = sorted(list(PG.nodes()))
            ts=nx.get_node_attributes(PG,'timestamp')
            timestamp_all = [ts[x] for x in TC_this['objid']]
            TC_this['timestamp'] = np.unique(timestamp_all)
            TC_this['datetime'] = [cftime.datetime(1970,1,1,0,0,0,calendar=options['calendar']) + dt.timedelta(seconds=int(x)) for x in TC_this['timestamp']]

            ##
            ## Sum/average the LPTs to get bulk/mean properties at each time.
            ##

            ## Initialize
            TC_this = initialize_time_cluster_fields(TC_this, len(TC_this['timestamp']))

            ## Loop over unique time stamps.
            for tt in range(len(TC_this['timestamp'])):
                max_area_already_used = -999.0
                this_objid_list = [TC_this['objid'][x] for x in range(len(TC_this['objid'])) if timestamp_all[x] == TC_this['timestamp'][tt]]
                for this_objid in this_objid_list:

                    OBJ = read_lp_object_properties(this_objid, options['objdir']
                            , ['centroid_lon','centroid_lat','area','pixels_x','pixels_y'
                            ,'min_lon','max_lon','min_lat','max_lat'
                            ,'amean_inst_field','amean_running_field','max_inst_field','max_running_field'
                            ,'min_inst_field','min_running_field','min_filtered_running_field'
                            ,'amean_filtered_running_field','max_filtered_running_field'], fmt=fmt)

                    TC_this['nobj'][tt] += 1
                    TC_this['area'][tt] += OBJ['area']
                    TC_this['centroid_lon'][tt] += OBJ['centroid_lon'] * OBJ['area']
                    TC_this['centroid_lat'][tt] += OBJ['centroid_lat'] * OBJ['area']
                    if OBJ['area'] > max_area_already_used:
                        TC_this['largest_object_centroid_lon'][tt] = 1.0*OBJ['centroid_lon']
                        TC_this['largest_object_centroid_lat'][tt] = 1.0*OBJ['centroid_lat']
                        max_area_already_used = 1.0*OBJ['area']

                    TC_this['min_lon'][tt] = min((TC_this['min_lon'][tt], OBJ['min_lon']))
                    TC_this['min_lat'][tt] = min((TC_this['min_lat'][tt], OBJ['min_lat']))
                    TC_this['max_lon'][tt] = max((TC_this['max_lon'][tt], OBJ['max_lon']))
                    TC_this['max_lat'][tt] = max((TC_this['max_lat'][tt], OBJ['max_lat']))

                    TC_this['amean_inst_field'][tt] += OBJ['amean_inst_field'] * OBJ['area']
                    TC_this['amean_running_field'][tt] += OBJ['amean_running_field'] * OBJ['area']
                    TC_this['amean_filtered_running_field'][tt] += OBJ['amean_filtered_running_field'] * OBJ['area']
                    TC_this['min_inst_field'][tt] = min((TC_this['min_inst_field'][tt], OBJ['min_inst_field']))
                    TC_this['min_running_field'][tt] = min((TC_this['min_running_field'][tt], OBJ['min_running_field']))
                    TC_this['min_filtered_running_field'][tt] = min((TC_this['min_filtered_running_field'][tt], OBJ['min_filtered_running_field']))
                    TC_this['max_inst_field'][tt] = max((TC_this['max_inst_field'][tt], OBJ['max_inst_field']))
                    TC_this['max_running_field'][tt] = max((TC_this['max_running_field'][tt], OBJ['max_running_field']))
                    TC_this['max_filtered_running_field'][tt] = max((TC_this['max_filtered_running_field'][tt], OBJ['max_filtered_running_field']))

                TC_this['centroid_lon'][tt] /= TC_this['area'][tt]
                TC_this['centroid_lat'][tt] /= TC_this['area'][tt]

                TC_this['amean_inst_field'][tt] /= TC_this['area'][tt]
                TC_this['amean_running_field'][tt] /= TC_this['area'][tt]
                TC_this['amean_filtered_running_field'][tt] /= TC_this['area'][tt]

            ## Least squares linear fit for propagation speed.
            Pzonal = np.polyfit(TC_this['timestamp'],TC_this['centroid_lon'],1)
            TC_this['zonal_propagation_speed'] = Pzonal[0] * 111000.0  # deg / s --> m / s

            Pmeridional = np.polyfit(TC_this['timestamp'],TC_this['centroid_lat'],1)
            TC_this['meridional_propagation_speed'] = Pmeridional[0] * 111000.0  # deg / s --> m / s

            TC_all.append(TC_this)

    return TC_all


###################################################
### Other processing functions. ###########################
###################################################


def get_lpo_mask(objid, objdir):

    dt1 = get_objid_datetime(objid)

    fmt = ("/%Y/%m/%Y%m%d/objects_%Y%m%d%H.nc")
    fn1 = (objdir + '/' + dt1.strftime(fmt)).replace('///','/').replace('//','/')

    DS1 = Dataset(fn1)
    id1 = DS1['objid'][:]
    idx1, = np.where(np.abs(id1 - objid) < 0.1)

    x1 = DS1['pixels_x'][:][idx1].compressed()
    y1 = DS1['pixels_y'][:][idx1].compressed()
    lon = DS1['grid_lon'][:]
    lat = DS1['grid_lat'][:]

    DS1.close()

    mask = np.zeros([len(lat), len(lon)])
    mask[y1,x1] = 1

    return (lon, lat, mask)

def plot_lpt_groups_time_lon_text(ax, LPT, BRANCHES, options, text_color='k'):

    objdir = options['objdir']
    dt_min = dt.datetime(1970,1,1,0,0,0) + dt.timedelta(seconds=int(np.min(LPT[:,0])))
    dt_max = dt.datetime(1970,1,1,0,0,0) + dt.timedelta(seconds=int(np.max(LPT[:,0])))

    for ii in range(len(LPT[:,0])):
        objid = LPT[ii,1]
        dt1 = get_objid_datetime(objid)
        fmt = ("/%Y/%m/%Y%m%d/objects_%Y%m%d%H.nc")
        fn1 = (objdir + '/' + dt1.strftime(fmt)).replace('///','/').replace('//','/')

        DS1 = Dataset(fn1)
        id1 = DS1['objid'][:]
        idx1, = np.where(np.abs(id1 - objid) < 0.1)
        lon = DS1['centroid_lon'][:][idx1]
        DS1.close()

        this_text_color = text_color
        this_zorder = 10
        if (LPT[ii,3] == 1):
            this_text_color = 'b'
            this_zorder = 20
        if (LPT[ii,4] == 1):
            this_text_color = 'm'
            this_zorder = 20
        if (LPT[ii,5] == 1):
            this_text_color = 'g'
            this_zorder = 20

        plt.text(lon, dt1, (str(LPT[ii,2]) + ": " + branches_binary_str4(BRANCHES[ii]))
                  , color=this_text_color, zorder=this_zorder, fontsize=6, clip_on=True)

    ax.set_xlim([0.0, 360.0])
    ax.set_ylim([dt_min, dt_max + dt.timedelta(hours=3)])


def float_lpt_id(group, branch):
    """
    Branch is a decimal tacked on to the group ID.
    group 7, branch #1 is 7.01
    group 20, branch 10 is 20.10
    Branch > 100 will give an error message and return np.nan.
    """
    if branch > 99:
        print('ERROR! Branch number > 99.')
        float_id = np.nan
    else:
        float_id = group + branch / 100.0

    return float_id


def plot_timeclusters_time_lon(ax, TIMECLUSTERS, linewidth=2.0):

    for ii in range(len(TIMECLUSTERS)):
        x = TIMECLUSTERS[ii]['centroid_lon']
        y = TIMECLUSTERS[ii]['datetime']
        ax.plot(x, y, 'k', linewidth=linewidth)

        plt.text(x[0], y[0], str(int(ii)), fontweight='bold', color='red', clip_on=True)
        plt.text(x[-1], y[-1], str(int(ii)), fontweight='bold', color='red', clip_on=True)