{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "896d4a4f",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-25T16:49:12.125121Z",
     "start_time": "2022-06-25T16:49:12.109985Z"
    }
   },
   "outputs": [],
   "source": [
    "## Convert segments to transects\n",
    "def seg2trans(seg, dist2coast, dist2trnsc):\n",
    "    # this function will convert segments into transects\n",
    "    # seg is a dataframe of the segments: mainland, lat1, lon1, lat2, lon2\n",
    "    # dist2coast is the distance of the transect from the coastline (in degree lat/lon)\n",
    "    # dist2trnsc is the distance between transects in a segment (in degree lat/lon)\n",
    "    \n",
    "    from shapely.geometry import LineString\n",
    "    import numpy as np\n",
    "    import pandas as pd\n",
    "    import xarray as xr\n",
    "    \n",
    "    segments = seg[[\"lat1\",\"lon1\",\"lat2\",\"lon2\"]].to_numpy().tolist()\n",
    "    transects = []\n",
    "    #mainlands = []\n",
    "    segidx = []\n",
    "\n",
    "    for s in range(len(segments)):\n",
    "        lat1 = segments[s][0]\n",
    "        lon1 = segments[s][1]\n",
    "        lat2 = segments[s][2]\n",
    "        lon2 = segments[s][3]\n",
    "\n",
    "        # Create a line aligned with coastline. Let's call it \"segment\"\n",
    "        segment = LineString([(lat1, lon1), (lat2, lon2)])\n",
    "\n",
    "        # How many possible transects on this segment?\n",
    "        n_trnsc = np.floor(segment.length/dist2trnsc)\n",
    "\n",
    "        # Create two parallel lines on the left and right side of the segment. \n",
    "        # One will be over landmass, the other will be over ocean\n",
    "        segpl = segment.parallel_offset(dist2coast, 'left')\n",
    "        segpr = segment.parallel_offset(dist2coast, 'right')\n",
    "\n",
    "        # Split segement at a specified distance\n",
    "        excess = segpl.length - (dist2trnsc*(n_trnsc-1))\n",
    "        start_dist = excess/2\n",
    "        distances = np.arange(start_dist, segpl.length, dist2trnsc)\n",
    "\n",
    "        ptsl = [segpl.interpolate(distance) for distance in distances]\n",
    "        ptsr = [segpr.interpolate(distance) for distance in distances]\n",
    "\n",
    "        list_ptsr = []\n",
    "        list_ptsl = []\n",
    "        for pr,pl in zip(ptsr,ptsl):\n",
    "            list_ptsr.append([round(pr.x,4), round(pr.y,4)])\n",
    "            list_ptsl.append([round(pl.x,4), round(pl.y,4)])\n",
    "            list_ptsr.sort()\n",
    "            list_ptsl.sort()  \n",
    "\n",
    "        #mland = []\n",
    "        trnscx = []\n",
    "        list_trnsc = []\n",
    "        sidx = []\n",
    "        for i in range(int(n_trnsc)):\n",
    "            sidx.append(s+1)\n",
    "            #mland.append(seg[\"mainland\"][s])\n",
    "            trnscx.append([list_ptsl[i], list_ptsr[i]])\n",
    "            list_trnsc.append([item for sublist in trnscx[i] for item in sublist])\n",
    "\n",
    "        segidx.extend(sidx)\n",
    "        #mainlands.extend(mland)\n",
    "        transects.extend(list_trnsc)\n",
    "    \n",
    "    df2 = pd.DataFrame (transects, columns = ['lat1', 'lon1', 'lat2', 'lon2'])\n",
    "    #df2[\"mainland\"] = mainlands\n",
    "    df2[\"segment_index\"] = segidx\n",
    "    \n",
    "    # Quality control of transects\n",
    "    df2[\"lsm1\"] = \" \"\n",
    "    df2[\"lsm2\"] = \" \"\n",
    "\n",
    "    lsmnc = \"/bog/amuttaqin/Datasets/ERA5-Land/land-sea-mask/lsm.nc\"\n",
    "    lsm = xr.open_dataset(lsmnc).squeeze(dim=\"time\", drop=True)\n",
    "\n",
    "    lat1 = df2[\"lat1\"].to_numpy().tolist()\n",
    "    lon1 = df2[\"lon1\"].to_numpy().tolist()\n",
    "    lat2 = df2[\"lat2\"].to_numpy().tolist()\n",
    "    lon2 = df2[\"lon2\"].to_numpy().tolist()\n",
    "\n",
    "    # Introduce a new dimension 'transect'\n",
    "    lat1 = xr.DataArray(lat1, dims='transect')\n",
    "    lon1 = xr.DataArray(lon1, dims='transect')\n",
    "    lat2 = xr.DataArray(lat2, dims='transect')\n",
    "    lon2 = xr.DataArray(lon2, dims='transect')\n",
    "\n",
    "    lsm1 = lsm.sel(lat=lat1, lon=lon1, method=\"nearest\")\n",
    "    lsm2 = lsm.sel(lat=lat2, lon=lon2, method=\"nearest\")\n",
    "\n",
    "    df2[\"lsm1\"] = lsm1[\"lsm\"].to_numpy().tolist()\n",
    "    df2[\"lsm2\"] = lsm2[\"lsm\"].to_numpy().tolist()\n",
    "\n",
    "    lands = df2[(df2[\"lsm1\"] >= 0.5) & (df2[\"lsm2\"] >= 0.5)]\n",
    "    seas = df2[(df2[\"lsm1\"] < 0.5) & (df2[\"lsm2\"] < 0.5)]\n",
    "\n",
    "    df2 = df2.drop(lands.index)\n",
    "    df2 = df2.drop(seas.index)\n",
    "\n",
    "    expcond = (df2[\"lsm1\"] < 0.5) & (df2[\"lsm2\"] >= 0.5)\n",
    "\n",
    "    df2.loc[expcond == False, [\"lat1\", \"lon1\", \"lat2\", \"lon2\", \"lsm1\", \"lsm2\"]] = (\n",
    "        df2.loc[expcond == False, [\"lat2\", \"lon2\", \"lat1\", \"lon1\", \"lsm2\", \"lsm1\"]].values)\n",
    "    \n",
    "    transects_qc = df2\n",
    "\n",
    "    return transects, transects_qc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "03a42f5e",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-06-25T16:49:13.802225Z",
     "start_time": "2022-06-25T16:49:13.796998Z"
    }
   },
   "outputs": [],
   "source": [
    "## Subset data and convert to netcdf\n",
    "def load_reg_nc(filein, region):\n",
    "    import metview as mv\n",
    "    import xarray as xr\n",
    "    import pandas as pd\n",
    "    from cdo import Cdo\n",
    "    cdo = Cdo()\n",
    "    \n",
    "    dfb = pd.read_csv('/bog/amuttaqin/Datasets/Derived/regional_bbox.csv', sep = ',').set_index(\"mainland\")\n",
    "    \n",
    "    lon1=dfb.at[region,\"lon1\"]\n",
    "    lon2=dfb.at[region,\"lon2\"]\n",
    "    lat1=dfb.at[region,\"lat1\"]\n",
    "    lat2=dfb.at[region,\"lat2\"]\n",
    "    \n",
    "    filenc = filein+region+'.nc'\n",
    "    filegrb = filein+region+'.grib'\n",
    "\n",
    "    if mv.exist(filenc):\n",
    "        fileout = xr.open_dataset(filenc)\n",
    "    else:\n",
    "        if mv.exist(filegrb):\n",
    "            cdo.copy(input=filegrb, options='-f nc4c -z zip', output=filenc)\n",
    "            fileout = xr.open_dataset(filenc)\n",
    "        else:\n",
    "            cdo.sellonlatbox(lon1,lon2,lat1,lat2, input=filein[:-1]+'.grib', output=filegrb)\n",
    "            cdo.copy(input=filegrb, options='-f nc4c -z zip', output=filenc)\n",
    "            fileout = xr.open_dataset(filenc)\n",
    "    return fileout"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "5b79d107",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2022-07-10T12:52:31.494339Z",
     "start_time": "2022-07-10T12:52:31.489458Z"
    }
   },
   "outputs": [],
   "source": [
    "import binsreg\n",
    "import pandas as pd\n",
    "def binscatter(**kwargs):   \n",
    "    # Estimate binsreg\n",
    "    est = binsreg.binsreg(**kwargs)\n",
    "    \n",
    "    # Retrieve estimates\n",
    "    df_est = pd.concat([d.dots for d in est.data_plot])\n",
    "    df_est = df_est.rename(columns={'x': kwargs.get(\"x\"), 'fit': kwargs.get(\"y\")})\n",
    "    \n",
    "    # Add confidence intervals\n",
    "    if \"ci\" in kwargs:\n",
    "        df_est = pd.merge(df_est, pd.concat([d.ci for d in est.data_plot]))\n",
    "        df_est = df_est.drop(columns=['x'])\n",
    "        df_est['ci'] = df_est['ci_r'] - df_est['ci_l']\n",
    "    \n",
    "    # Rename groups\n",
    "    if \"by\" in kwargs:\n",
    "        df_est['group'] = df_est['group'].astype(data[kwargs.get(\"by\")].dtype)\n",
    "        df_est = df_est.rename(columns={'group': kwargs.get(\"by\")})\n",
    "\n",
    "    return df_est"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bea654ff",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.12"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}