{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "896d4a4f", "metadata": { "ExecuteTime": { "end_time": "2022-06-25T16:49:12.125121Z", "start_time": "2022-06-25T16:49:12.109985Z" } }, "outputs": [], "source": [ "## Convert segments to transects\n", "def seg2trans(seg, dist2coast, dist2trnsc):\n", " # this function will convert segments into transects\n", " # seg is a dataframe of the segments: mainland, lat1, lon1, lat2, lon2\n", " # dist2coast is the distance of the transect from the coastline (in degree lat/lon)\n", " # dist2trnsc is the distance between transects in a segment (in degree lat/lon)\n", " \n", " from shapely.geometry import LineString\n", " import numpy as np\n", " import pandas as pd\n", " import xarray as xr\n", " \n", " segments = seg[[\"lat1\",\"lon1\",\"lat2\",\"lon2\"]].to_numpy().tolist()\n", " transects = []\n", " #mainlands = []\n", " segidx = []\n", "\n", " for s in range(len(segments)):\n", " lat1 = segments[s][0]\n", " lon1 = segments[s][1]\n", " lat2 = segments[s][2]\n", " lon2 = segments[s][3]\n", "\n", " # Create a line aligned with coastline. Let's call it \"segment\"\n", " segment = LineString([(lat1, lon1), (lat2, lon2)])\n", "\n", " # How many possible transects on this segment?\n", " n_trnsc = np.floor(segment.length/dist2trnsc)\n", "\n", " # Create two parallel lines on the left and right side of the segment. \n", " # One will be over landmass, the other will be over ocean\n", " segpl = segment.parallel_offset(dist2coast, 'left')\n", " segpr = segment.parallel_offset(dist2coast, 'right')\n", "\n", " # Split segement at a specified distance\n", " excess = segpl.length - (dist2trnsc*(n_trnsc-1))\n", " start_dist = excess/2\n", " distances = np.arange(start_dist, segpl.length, dist2trnsc)\n", "\n", " ptsl = [segpl.interpolate(distance) for distance in distances]\n", " ptsr = [segpr.interpolate(distance) for distance in distances]\n", "\n", " list_ptsr = []\n", " list_ptsl = []\n", " for pr,pl in zip(ptsr,ptsl):\n", " list_ptsr.append([round(pr.x,4), round(pr.y,4)])\n", " list_ptsl.append([round(pl.x,4), round(pl.y,4)])\n", " list_ptsr.sort()\n", " list_ptsl.sort() \n", "\n", " #mland = []\n", " trnscx = []\n", " list_trnsc = []\n", " sidx = []\n", " for i in range(int(n_trnsc)):\n", " sidx.append(s+1)\n", " #mland.append(seg[\"mainland\"][s])\n", " trnscx.append([list_ptsl[i], list_ptsr[i]])\n", " list_trnsc.append([item for sublist in trnscx[i] for item in sublist])\n", "\n", " segidx.extend(sidx)\n", " #mainlands.extend(mland)\n", " transects.extend(list_trnsc)\n", " \n", " df2 = pd.DataFrame (transects, columns = ['lat1', 'lon1', 'lat2', 'lon2'])\n", " #df2[\"mainland\"] = mainlands\n", " df2[\"segment_index\"] = segidx\n", " \n", " # Quality control of transects\n", " df2[\"lsm1\"] = \" \"\n", " df2[\"lsm2\"] = \" \"\n", "\n", " lsmnc = \"/bog/amuttaqin/Datasets/ERA5-Land/land-sea-mask/lsm.nc\"\n", " lsm = xr.open_dataset(lsmnc).squeeze(dim=\"time\", drop=True)\n", "\n", " lat1 = df2[\"lat1\"].to_numpy().tolist()\n", " lon1 = df2[\"lon1\"].to_numpy().tolist()\n", " lat2 = df2[\"lat2\"].to_numpy().tolist()\n", " lon2 = df2[\"lon2\"].to_numpy().tolist()\n", "\n", " # Introduce a new dimension 'transect'\n", " lat1 = xr.DataArray(lat1, dims='transect')\n", " lon1 = xr.DataArray(lon1, dims='transect')\n", " lat2 = xr.DataArray(lat2, dims='transect')\n", " lon2 = xr.DataArray(lon2, dims='transect')\n", "\n", " lsm1 = lsm.sel(lat=lat1, lon=lon1, method=\"nearest\")\n", " lsm2 = lsm.sel(lat=lat2, lon=lon2, method=\"nearest\")\n", "\n", " df2[\"lsm1\"] = lsm1[\"lsm\"].to_numpy().tolist()\n", " df2[\"lsm2\"] = lsm2[\"lsm\"].to_numpy().tolist()\n", "\n", " lands = df2[(df2[\"lsm1\"] >= 0.5) & (df2[\"lsm2\"] >= 0.5)]\n", " seas = df2[(df2[\"lsm1\"] < 0.5) & (df2[\"lsm2\"] < 0.5)]\n", "\n", " df2 = df2.drop(lands.index)\n", " df2 = df2.drop(seas.index)\n", "\n", " expcond = (df2[\"lsm1\"] < 0.5) & (df2[\"lsm2\"] >= 0.5)\n", "\n", " df2.loc[expcond == False, [\"lat1\", \"lon1\", \"lat2\", \"lon2\", \"lsm1\", \"lsm2\"]] = (\n", " df2.loc[expcond == False, [\"lat2\", \"lon2\", \"lat1\", \"lon1\", \"lsm2\", \"lsm1\"]].values)\n", " \n", " transects_qc = df2\n", "\n", " return transects, transects_qc" ] }, { "cell_type": "code", "execution_count": 2, "id": "03a42f5e", "metadata": { "ExecuteTime": { "end_time": "2022-06-25T16:49:13.802225Z", "start_time": "2022-06-25T16:49:13.796998Z" } }, "outputs": [], "source": [ "## Subset data and convert to netcdf\n", "def load_reg_nc(filein, region):\n", " import metview as mv\n", " import xarray as xr\n", " import pandas as pd\n", " from cdo import Cdo\n", " cdo = Cdo()\n", " \n", " dfb = pd.read_csv('/bog/amuttaqin/Datasets/Derived/regional_bbox.csv', sep = ',').set_index(\"mainland\")\n", " \n", " lon1=dfb.at[region,\"lon1\"]\n", " lon2=dfb.at[region,\"lon2\"]\n", " lat1=dfb.at[region,\"lat1\"]\n", " lat2=dfb.at[region,\"lat2\"]\n", " \n", " filenc = filein+region+'.nc'\n", " filegrb = filein+region+'.grib'\n", "\n", " if mv.exist(filenc):\n", " fileout = xr.open_dataset(filenc)\n", " else:\n", " if mv.exist(filegrb):\n", " cdo.copy(input=filegrb, options='-f nc4c -z zip', output=filenc)\n", " fileout = xr.open_dataset(filenc)\n", " else:\n", " cdo.sellonlatbox(lon1,lon2,lat1,lat2, input=filein[:-1]+'.grib', output=filegrb)\n", " cdo.copy(input=filegrb, options='-f nc4c -z zip', output=filenc)\n", " fileout = xr.open_dataset(filenc)\n", " return fileout" ] }, { "cell_type": "code", "execution_count": 1, "id": "5b79d107", "metadata": { "ExecuteTime": { "end_time": "2022-07-10T12:52:31.494339Z", "start_time": "2022-07-10T12:52:31.489458Z" } }, "outputs": [], "source": [ "import binsreg\n", "import pandas as pd\n", "def binscatter(**kwargs): \n", " # Estimate binsreg\n", " est = binsreg.binsreg(**kwargs)\n", " \n", " # Retrieve estimates\n", " df_est = pd.concat([d.dots for d in est.data_plot])\n", " df_est = df_est.rename(columns={'x': kwargs.get(\"x\"), 'fit': kwargs.get(\"y\")})\n", " \n", " # Add confidence intervals\n", " if \"ci\" in kwargs:\n", " df_est = pd.merge(df_est, pd.concat([d.ci for d in est.data_plot]))\n", " df_est = df_est.drop(columns=['x'])\n", " df_est['ci'] = df_est['ci_r'] - df_est['ci_l']\n", " \n", " # Rename groups\n", " if \"by\" in kwargs:\n", " df_est['group'] = df_est['group'].astype(data[kwargs.get(\"by\")].dtype)\n", " df_est = df_est.rename(columns={'group': kwargs.get(\"by\")})\n", "\n", " return df_est" ] }, { "cell_type": "code", "execution_count": null, "id": "bea654ff", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.12" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 5 }