From 1daebcab916b1cd16ef4489767a47ca14b91e042 Mon Sep 17 00:00:00 2001 From: tomsail Date: Thu, 8 Aug 2024 13:07:09 +0200 Subject: [PATCH] correction for Observations_IOC_STOFS and added IOC_in_STOFS --- IOC_in_STOFS.html | 9243 ++++++++++++++++++++++++++++++++++ IOC_in_STOFS.ipynb | 302 ++ Observations_IOC_STOFS.html | 249 +- Observations_IOC_STOFS.ipynb | 18 +- index.html | 1 + 5 files changed, 9698 insertions(+), 115 deletions(-) create mode 100644 IOC_in_STOFS.html create mode 100644 IOC_in_STOFS.ipynb diff --git a/IOC_in_STOFS.html b/IOC_in_STOFS.html new file mode 100644 index 0000000..ab4f378 --- /dev/null +++ b/IOC_in_STOFS.html @@ -0,0 +1,9243 @@ + + + + + +IOC_in_STOFS + + + + + + + + + + + + +
+ + + +
+ + diff --git a/IOC_in_STOFS.ipynb b/IOC_in_STOFS.ipynb new file mode 100644 index 0000000..721de2b --- /dev/null +++ b/IOC_in_STOFS.ipynb @@ -0,0 +1,302 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Find IOC stations in STOFS2D/3D output file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import geopandas as gp\n", + "import pandas as pd\n", + "import searvey\n", + "import hvplot.pandas\n", + "import hvplot.xarray\n", + "import numpy as np\n", + "import sklearn.neighbors" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "get stofs stations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mycols = [str(i) for i in range(6)] # we expect 17 cols max in that file\n", + "stof2d = pd.read_csv(\n", + " \"https://polar.ncep.noaa.gov/stofs/data/stofs_2d_glo_elev_stat_v2_1_0\",\n", + " names=mycols, \n", + " sep=\"\\t+|!\", \n", + " header=None, \n", + " skiprows=1\n", + ")\n", + "stof2d['Info'] = stof2d.apply(lambda row: ' '.join(filter(None, row[2:])), axis=1)\n", + "stof2d['ID'] = stof2d['Info'].apply(lambda x: ' '.join(x.split()[:3]))\n", + "stof2d['Info'] = stof2d.apply(lambda row: row['Info'].replace(row['ID'], '').strip(), axis=1)\n", + "stof2d = stof2d.drop(columns=[\"2\", \"3\", \"4\", \"5\"])\n", + "stof2d.rename(columns={\"0\": 'lon', \"1\": 'lat'}, inplace=True)\n", + "stof2d" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "get ioc stations" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "def ioc_subset_from_files_in_folder(\n", + " df: pd.DataFrame, folder: str, ext: str = \".parquet\"\n", + "):\n", + " \"\"\"this function return a subset of the ioc database from all the files (json or parquet)\n", + " present in a folder\n", + " \"\"\"\n", + " list_files = []\n", + " for file in os.listdir(folder):\n", + " name = file.split(ext)[0]\n", + " if file.endswith(ext):\n", + " list_files.append(name)\n", + " return df[df.ioc_code.isin(list_files)]\n", + "\n", + "def get_meta() -> gp.GeoDataFrame:\n", + " meta_web = searvey.get_ioc_stations().drop(columns=[\"lon\", \"lat\"])\n", + " meta_api = (\n", + " pd.read_json(\n", + " \"http://www.ioc-sealevelmonitoring.org/service.php?query=stationlist&showall=all\"\n", + " )\n", + " .drop_duplicates()\n", + " .drop(columns=[\"lon\", \"lat\"])\n", + " .rename(columns={\"Code\": \"ioc_code\", \"Lon\": \"lon\", \"Lat\": \"lat\"})\n", + " )\n", + " merged = pd.merge(\n", + " meta_web,\n", + " meta_api[[\"ioc_code\", \"lon\", \"lat\"]].drop_duplicates(),\n", + " on=[\"ioc_code\"],\n", + " )\n", + " return merged.drop(columns=[\"geometry\"])\n", + "\n", + "ioc_ = get_meta()\n", + "ioc_cleanup = ioc_subset_from_files_in_folder(ioc_, \"/home/tomsail/Documents/work/python/seareport_org/skill-panel/01_obs/surge\")\n", + "drop_index = ioc_cleanup.ioc_code.isin(['dapi', 'datu', 'djve', 'dkwa'])\n", + "ioc_cleanup = ioc_cleanup[~drop_index]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def find_nearest_nodes(\n", + " mesh_nodes: pd.DataFrame,\n", + " points: pd.DataFrame,\n", + " metric: str = \"haversine\",\n", + " earth_radius = 6371000,\n", + " ):\n", + " \"\"\"\n", + " Calculate the mesh nodes that are nearest to the specified `points`.\n", + " Both `mesh_nodes` and `points` must be `pandas.DataFrames` that have\n", + " columns named `lon` and `lat` and the coords must be in EPSG:4326.\n", + " Returns the `points` DataFrame after adding these extra columns:\n", + " - `mesh_index` which is the index of the node in the `hgrid.gr3` file\n", + " - `mesh_lon` which is the longitude of the nearest mesh node\n", + " - `mesh_lat` which is the latitude of the nearest mesh node\n", + " - `distance` which is the distance in meters between the point and the nearest mesh node\n", + " Examples:\n", + " >>> mesh_nodes = pd.DataFrame({\n", + " ... \"lon\": [0, 10, 20],\n", + " ... \"lat\": [0, 5, 0],\n", + " ... })\n", + " >>> points = pd.DataFrame({\n", + " ... \"lon\": [1, 11, 21],\n", + " ... \"lat\": [1, 4, 1],\n", + " ... \"id\": [\"a\", \"b\", \"c\"],\n", + " ... })\n", + " >>> nearest_nodes = find_nearest_nodes(mesh_nodes, points)\n", + " >>> nearest_nodes\n", + " lon lat id mesh_index mesh_lon mesh_lat distance\n", + " 0 1 1 a 0 0 0 157249.381272\n", + " 1 11 4 b 1 10 5 157010.162641\n", + " 2 21 1 c 2 20 0 157249.381272\n", + " \"\"\"\n", + " # The only requirement is that both `mesh_nodes and `points` have `lon/lat` columns\n", + " tree = sklearn.neighbors.BallTree(\n", + " np.radians(mesh_nodes[[\"lat\", \"lon\"]]),\n", + " metric=metric,\n", + " )\n", + " distances, indices = tree.query(np.radians(points[[\"lat\", \"lon\"]].values))\n", + " closest_nodes = (\n", + " mesh_nodes\n", + " .rename(columns={\"lon\": \"mesh_lon\", \"lat\": \"mesh_lat\"})\n", + " .iloc[indices.flatten()]\n", + " .assign(distance=(distances.flatten() * earth_radius))\n", + " .reset_index(names=[\"mesh_index\"])\n", + " )\n", + " return pd.concat((points, closest_nodes), axis=\"columns\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "nearest_nodes = find_nearest_nodes(stof2d, ioc_cleanup)\n", + "nearest_nodes = nearest_nodes[~nearest_nodes.mesh_index.isna()]\n", + "nearest_nodes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "keep_nodes = nearest_nodes[nearest_nodes.distance < 2000]\n", + "stof2d.iloc[keep_nodes.mesh_index]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plot1 = stof2d.hvplot.points(\n", + " x='lon', y='lat', \n", + " s = 50,\n", + " geo=True, \n", + " tiles = True, \n", + " hover_cols=[\"ID\"], \n", + " label = 'STOFS stations'\n", + ")\n", + "plot2 = stof2d.iloc[keep_nodes.mesh_index].hvplot.points(\n", + " x='lon', y='lat', \n", + " s = 50,\n", + " geo=True, \n", + " tiles = True, \n", + " hover_cols=[\"ID\"], \n", + " label = 'IOC stations'\n", + ")\n", + "plot3 = ioc_cleanup.hvplot.points(\n", + " x='lon', y='lat', \n", + " s = 10,\n", + " geo=True, \n", + " tiles = True, \n", + " hover_cols=[\"ioc_code\"], \n", + " label = 'concomittent IOC/STOFS stations'\n", + ")\n", + "(plot1 * plot2 * plot3).opts(\n", + " width=1400, height=800, title='STOFS2D/3D stations and IOC stations'\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "keep_nodes.mesh_index.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import xarray as xr\n", + "import glob\n", + "import dask\n", + "\n", + "filenames1 = []\n", + "filenames2 = []\n", + "for file in sorted(glob.glob('noaa/stofs2d/202*')):\n", + " root, filename = os.path.split(file)\n", + " number = int(filename.split(\"_\")[0])\n", + " if number > 20230107:\n", + " filenames2.append(file)\n", + " else: \n", + " filenames1.append(file)\n", + "\n", + "ds1 = xr.open_mfdataset(filenames1, combine='nested', concat_dim = \"time\")\n", + "ds2 = xr.open_mfdataset(filenames2, combine='nested', concat_dim = \"time\")\n", + "\n", + "def keep_first_timeindex(ds):\n", + " mask = ds.time.to_pandas().duplicated(\"last\").values\n", + " data_vars = []\n", + " for var in list(ds.data_vars):\n", + " with dask.config.set(**{\"array.slicing.split_large_chunks\": True}):\n", + " data_vars.append(ds[var][~mask])\n", + " return xr.merge(data_vars)\n", + "\n", + "\n", + "ds1 = keep_first_timeindex(ds1)\n", + "ds2 = keep_first_timeindex(ds2)\n", + "ds1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for it, tmp in keep_nodes.iterrows():\n", + " print(f'Station {tmp.ioc_code} at {tmp.lon:.2f}, {tmp.lat:.2f}')\n", + "\n", + " ds1_subset = ds1.isel(station = int(tmp.mesh_index))\n", + " ds2_subset = ds2.isel(station = int(tmp.mesh_index))\n", + " print(ds1_subset)\n", + " ds_subset = xr.concat([ds1_subset, ds2_subset], dim=\"time\")\n", + " ds_subset.zeta.hvplot(figsize=(10, 5))\n", + " df = ds_subset.to_pandas()\n", + " df.to_parquet(f'01_obs/stofs2d/{tmp.ioc_code}.parquet')\n", + " break\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "pos_test", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Observations_IOC_STOFS.html b/Observations_IOC_STOFS.html index 101e5f6..cc99584 100644 --- a/Observations_IOC_STOFS.html +++ b/Observations_IOC_STOFS.html @@ -7532,9 +7532,9 @@