Skip to content

Commit

Permalink
[tide] documented, corrected detide() and added resampling in tide_an…
Browse files Browse the repository at this point in the history
…alysis() + remove unused function
  • Loading branch information
tomsail committed Mar 9, 2024
1 parent 0f3e54c commit 524854b
Show file tree
Hide file tree
Showing 24 changed files with 288,685 additions and 6,040 deletions.
60 changes: 60 additions & 0 deletions .github/workflows/run_tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
name: test

on:
push:
branches:
- "main"
- "master"
- "dev"
paths:
- "**.py"
- ".github/workflows/*test*.yml"
- "pyproject.toml"
- "poetry.lock"
- "requirements/requirements*.txt"
pull_request:
paths:
- "**.py"
- ".github/workflows/*test*.yml"
- "pyproject.toml"
- "poetry.lock"
- "requirements/requirements*.txt"

jobs:
test:
name: "test Python ${{ matrix.python }} on ${{ matrix.os }}"
runs-on: "${{ matrix.os }}"
strategy:
fail-fast: false
matrix:
os: ["ubuntu-latest"]
python: ["3.9", "3.10", "3.11"]
include:
- os: "macos-latest"
python-version: "3.10"
defaults:
run:
shell: "bash -eo pipefail {0}"

steps:
- uses: "actions/checkout@main"
- uses: "actions/setup-python@main"
with:
python-version: "${{ matrix.python }}"
- uses: "actions/cache@main"
id: "cache"
with:
path: "${{ env.pythonLocation }}"
key: "test-${{ runner.os }}-${{ env.pythonLocation }}-${{ hashFiles('pyproject.toml', 'requirements/*') }}"
- run: "python --version"
- run: "python -mpip install -U pip"
- run: "python -mpip --version"
- run: "python -mpip install -r requirements/requirements.txt"
- name: "Install requirements"
- run: "python -mpip install ./"
- run: "python -mpip cache info"
- run: "python -mpip freeze"
- name: "Run tests"
- run: "pytest tests/"
env:
PYTHONPATH: ${{ github.workspace }}/tests
12 changes: 0 additions & 12 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -80,18 +80,6 @@ repos:
"-o",
"requirements/requirements.txt",
]
- id: "poetry-export"
name: "poetry export dev"
args:
[
"--with",
"dev",
"-f",
"requirements.txt",
"-o",
"requirements/requirements-dev.txt",
]

- repo: "local"
hooks:
- id: "mypy"
Expand Down
17 changes: 0 additions & 17 deletions analysea/steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
import numpy.typing as npt
import pandas as pd
import ruptures as rpt
from scipy import interpolate
from skimage.restoration import denoise_tv_chambolle

from analysea.utils import detect_time_step

Expand Down Expand Up @@ -69,18 +67,3 @@ def remove_steps_simple(df: pd.DataFrame, threshold: float) -> Tuple[pd.DataFram
step = df.interpolate().iloc[stepx : steps_ix[i + 1]].mean()
step_function.iloc[stepx : steps_ix[i + 1]] = step
return step_function, steps_ix


def step_function_TV(df: pd.DataFrame, weight: float = 1) -> Tuple[pd.DataFrame, npt.NDArray[Any]]:
idx = range(0, len(df), 200)
signal = np.array(df.interpolate())[idx]
# adjust the parameters
signal_denoise = denoise_tv_chambolle(signal, weight=weight)
# x_step = -2*np.cumsum(signal_denoise)
# step_indicator = x_step == x_step.max()
f = interpolate.interp1d(
idx, signal_denoise, fill_value="extrapolate"
) # for extrapolation in last scipy version
res = pd.Series(data=f(range(len(df))), index=df.index)
#
return remove_steps_simple(res, res.std())
104 changes: 94 additions & 10 deletions analysea/tide.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,26 @@ def calc_constituents(
lat: float = 0.0,
**kwargs: Dict[str, Any] | None,
) -> Any:
"""
Calculate the tide constituents for a time series.
Parameters
----------
ts : pd.Series
The time series to analyze.
resample_time : int, optional
The resample time in minutes, by default 30.
lat : float, optional
The latitude of the station, by default 0.0.
**kwargs : Dict[str, Any], optional
Additional keyword arguments to pass to the utide.solve function.
Returns
-------
np.ndarray
The tide constituents.
"""
# resample to 30 min for a MUCH faster analysis
# https://github.com/wesleybowman/UTide/issues/103
h_rsmp = ts.resample(f"{resample_time}min").mean()
Expand Down Expand Up @@ -134,9 +154,10 @@ def detide(
@param lat: latitude of the station
@param resample_time: resample time in minutes
@param split_period: period in days to split the time series into (default 365)
@param kwargs: keyword arguments to be passed to utide.reconstruct
@param kwargs: keyword arguments to be passed to calc constituents
@return: reconstructed time series
"""
verbose = kwargs.get("verbose", False)
result_series = []
if constituents is None:
constituents = calc_constituents(ts=ts, resample_time=resample_time, lat=lat, **kwargs)
Expand All @@ -145,36 +166,99 @@ def detide(
df = h_rsmp.shift(freq=f"{resample_time / 2}min")

for start in range(0, len(df), chunk):
end = min(start + chunk, len(ts))
ts_chunk = ts.iloc[start:end]
end = min(start + chunk, len(df))
ts_chunk = df.iloc[start:end]

if not ts_chunk.empty:
tidal = utide.reconstruct(ts_chunk.index, nd_format(constituents), verbose=kwargs["verbose"])
tidal = utide.reconstruct(ts_chunk.index, nd_format(constituents), verbose=verbose)
storm_surge = ts_chunk - tidal.h
result_series.append(storm_surge)
return pd.concat(result_series)


def tide_analysis(
ts: pd.Series[float],
resample_time: int = 10,
resample_time: int = 30,
lat: float = 0.0,
**kwargs: Dict[str, Any] | None,
) -> Tuple[pd.DataFrame, pd.DataFrame, npt.NDArray[Any]]:
) -> Tuple[pd.Series, pd.Series, npt.NDArray[Any]]:
"""
Perform a tide analysis on a time series.
Parameters
----------
ts : pd.Series
The time series to analyze.
resample_time : int, optional
The resample time in minutes, by default 30.
lat : float, optional
The latitude of the station, by default 0.0.
**kwargs : Dict[str, Any], optional
Additional keyword arguments to pass to the utide.solve function.
Returns
-------
Tuple[pd.DataFrame, pd.DataFrame, npt.NDArray[Any]]
A tuple containing the following elements:
* tide : pd.DataFrame
A dataframe containing the tide values.
* surge : pd.DataFrame
A dataframe containing the surge values.
* constituents : npt.NDArray[Any]
The constituents used in the analysis.
"""
verbose = kwargs.get("verbose", False)
constituents = calc_constituents(ts=ts, lat=lat, resample_time=resample_time, **kwargs)
tidal = utide.reconstruct(ts.index, constituents, **kwargs)
tide = pd.Series(data=tidal.h, index=ts.index)
surge = pd.Series(data=ts.iloc[0, :].values - tidal.h, index=ts.index)

h_rsmp = ts.resample(f"{resample_time}min").apply(np.nanmean)
df = h_rsmp.shift(freq=f"{resample_time / 2}min")

tidal = utide.reconstruct(df.index, constituents, verbose=verbose)
tide = pd.Series(data=tidal.h, index=df.index)
surge = pd.Series(data=df.values - tidal.h, index=df.index)
return tide, surge, constituents


def yearly_tide_analysis(
h: pd.Series[float],
resample_time: int = 10,
resample_time: int = 30,
split_period: int = 365,
lat: int = 0,
**kwargs: Dict[str, Any] | None,
) -> Tuple[pd.DataFrame, pd.DataFrame, List[npt.NDArray[Any]], List[int]]:
"""
Perform a tide analysis on a time series, split into yearly intervals.
Parameters
----------
h : pd.Series
The time series to analyze.
resample_time : int, optional
The resample time in minutes, by default 30.
split_period : int, optional
The period in days to split the time series into, by default 365.
lat : int, optional
The latitude of the station, by default 0.
**kwargs : Dict[str, Any], optional
Additional keyword arguments to pass to the utide.solve function.
Returns
-------
Tuple[pd.DataFrame, pd.DataFrame, List[npt.NDArray[Any]], List[int]]
A tuple containing the following elements:
* tide : pd.DataFrame
A dataframe containing the tide values.
* surge : pd.DataFrame
A dataframe containing the surge values.
* coefs : List[npt.NDArray[Any]]
The constituents used in the analysis for each year.
* years : List[int]
The years analyzed.
"""
log = kwargs.get("verbose", False)

min_time = pd.Timestamp(h.index.min())
Expand Down
66 changes: 65 additions & 1 deletion analysea/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Any
from typing import cast
from typing import Dict
from typing import Iterator
from typing import Optional
from typing import Tuple
from typing import Union
Expand All @@ -20,6 +21,41 @@
# ===================
# TIME SERIES
# ===================
def resample(df: pd.DataFrame, t_rsp: int = 30) -> pd.DataFrame:
"""
Resample a pandas dataframe to a new time interval.
@param df (pd.DataFrame): The input dataframe.
@param t_rsp (int): optional, The target resample period in minutes
by default 30.
@returns (pd.DataFrame): The resampled dataframe.
"""
ts = df.resample(f"{t_rsp}min").mean().shift(freq=f"{int(t_rsp/2)}min")
return ts


def interpolate(df: pd.DataFrame, t_rsp: int = 30) -> pd.DataFrame:
"""
This function resamples a pandas dataframe to a new time interval
using linear interpolation.
It uses analysea's detect_time_step function to interpolate only
between holes in the data and not extrapolate "flat areas" in the
signal
@param df (pd.DataFrame): The input dataframe.
@param t_rsp (int): optional, The target resample period in minutes
by default 30.
@returns (pd.DataFrame): The interpolated dataframe.
"""
time_step = detect_time_step(df)
n_interp = int(t_rsp * 60 / time_step.total_seconds())
ts = df.interpolate(method="linear", limit=n_interp)
return ts


def detect_splits(sr: pd.Series, max_gap: pd.Timedelta) -> pd.DatetimeIndex:
split_points = pd.DatetimeIndex([sr.index[0], sr.index[-1]])
condition = sr.index.to_series().diff() > max_gap
Expand All @@ -28,7 +64,15 @@ def detect_splits(sr: pd.Series, max_gap: pd.Timedelta) -> pd.DatetimeIndex:
return split_points


def split_series(sr: pd.Series, max_gap: pd.Timedelta = pd.Timedelta(hours=24)) -> pd.Series:
def split_series(sr: pd.Series, max_gap: pd.Timedelta = pd.Timedelta(hours=24)) -> Iterator[pd.Series]:
"""
Splits a pandas series into segments without overlapping gaps larger than max_gap.
@param sr (pd.Series): The input series.
@param max_gap (pd.Timedelta): The maximum allowed gap between two segments.
@returns: Iterator[pd.Series]: An iterator of segments.
"""
for start, stop in itertools.pairwise(detect_splits(sr=sr, max_gap=max_gap)):
segment = sr[start:stop]
yield segment[:-1]
Expand Down Expand Up @@ -69,6 +113,26 @@ def cleanup(
despike: bool = True,
demean: bool = True,
) -> pd.DataFrame:
"""
This function cleans up a time series by removing outliers,
detecting and removing flat areas, and removing steps.
@param ts (pd.Series): The input time series.
@param clip_limits tuple[float, float]: Optional, The lower and upper
bounds for outlier detection. If None, outlier detection is not performed.
@param kurtosis (float): The threshold for detecting outliers. If the absolute
value of the kurtosis of a segment is less than this value, the segment is
considered clean.
@param remove_flats (bool): Whether to remove flat areas from the time series.
If True, flat areas are detected by comparing the difference in consecutive values.
@param despike (bool): Whether to remove outliers using the provided clip_limits.
If True and clip_limits is not None, outlier detection is performed.
@param demean (bool): Whether to demean the time series.
If True, the mean of the time series is subtracted from each value.
@returns (pd.DataFrame): The cleaned up time series.
"""
# Check if the input is empty
if ts.empty:
return pd.DataFrame()
Expand Down
Loading

0 comments on commit 524854b

Please sign in to comment.