Skip to content

Commit

Permalink
added tests, CI and resample/interp functions
Browse files Browse the repository at this point in the history
  • Loading branch information
tomsail committed Mar 8, 2024
1 parent e4f3817 commit dc86a13
Show file tree
Hide file tree
Showing 22 changed files with 287,671 additions and 5,207 deletions.
60 changes: 60 additions & 0 deletions .github/workflows/run_tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
name: test

on:
push:
branches:
- "main"
- "master"
- "dev"
paths:
- "**.py"
- ".github/workflows/*test*.yml"
- "pyproject.toml"
- "poetry.lock"
- "requirements/requirements*.txt"
pull_request:
paths:
- "**.py"
- ".github/workflows/*test*.yml"
- "pyproject.toml"
- "poetry.lock"
- "requirements/requirements*.txt"

jobs:
test:
name: "test Python ${{ matrix.python }} on ${{ matrix.os }}"
runs-on: "${{ matrix.os }}"
strategy:
fail-fast: false
matrix:
os: ["ubuntu-latest"]
python: ["3.9", "3.10", "3.11"]
include:
- os: "macos-latest"
python-version: "3.10"
defaults:
run:
shell: "bash -eo pipefail {0}"

steps:
- uses: "actions/checkout@main"
- uses: "actions/setup-python@main"
with:
python-version: "${{ matrix.python }}"
- uses: "actions/cache@main"
id: "cache"
with:
path: "${{ env.pythonLocation }}"
key: "test-${{ runner.os }}-${{ env.pythonLocation }}-${{ hashFiles('pyproject.toml', 'requirements/*') }}"
- run: "python --version"
- run: "python -mpip install -U pip"
- run: "python -mpip --version"
- run: "python -mpip install -r requirements/requirements.txt"
- name: "Install requirements"
- run: "python -mpip install ./"
- run: "python -mpip cache info"
- run: "python -mpip freeze"
- name: "Run tests"
- run: "pytest tests/"
env:
PYTHONPATH: ${{ github.workspace }}/tests
12 changes: 0 additions & 12 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -80,18 +80,6 @@ repos:
"-o",
"requirements/requirements.txt",
]
- id: "poetry-export"
name: "poetry export dev"
args:
[
"--with",
"dev",
"-f",
"requirements.txt",
"-o",
"requirements/requirements-dev.txt",
]

- repo: "local"
hooks:
- id: "mypy"
Expand Down
13 changes: 7 additions & 6 deletions analysea/tide.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,10 @@ def detide(
@param lat: latitude of the station
@param resample_time: resample time in minutes
@param split_period: period in days to split the time series into (default 365)
@param kwargs: keyword arguments to be passed to utide.reconstruct
@param kwargs: keyword arguments to be passed to calc constituents
@return: reconstructed time series
"""
verbose = kwargs.get("verbose", False)
result_series = []
if constituents is None:
constituents = calc_constituents(ts=ts, resample_time=resample_time, lat=lat, **kwargs)
Expand All @@ -169,7 +170,7 @@ def detide(
ts_chunk = df.iloc[start:end]

if not ts_chunk.empty:
tidal = utide.reconstruct(ts_chunk.index, nd_format(constituents), verbose=kwargs["verbose"])
tidal = utide.reconstruct(ts_chunk.index, nd_format(constituents), verbose=verbose)
storm_surge = ts_chunk - tidal.h
result_series.append(storm_surge)
return pd.concat(result_series)
Expand All @@ -180,7 +181,7 @@ def tide_analysis(
resample_time: int = 30,
lat: float = 0.0,
**kwargs: Dict[str, Any] | None,
) -> Tuple[pd.DataFrame, pd.DataFrame, npt.NDArray[Any]]:
) -> Tuple[pd.Series, pd.Series, npt.NDArray[Any]]:
"""
Perform a tide analysis on a time series.
Expand Down Expand Up @@ -208,15 +209,15 @@ def tide_analysis(
The constituents used in the analysis.
"""
verbose = kwargs.get("verbose", False)
constituents = calc_constituents(ts=ts, lat=lat, resample_time=resample_time, **kwargs)

h_rsmp = ts.resample(f"{resample_time}min").apply(np.nanmean)
df = h_rsmp.shift(freq=f"{resample_time / 2}min")

tidal = utide.reconstruct(df.index, constituents, **kwargs)
tidal = utide.reconstruct(df.index, constituents, verbose=verbose)
tide = pd.Series(data=tidal.h, index=df.index)
surge = pd.Series(data=df.iloc[0, :].values - tidal.h, index=df.index)

surge = pd.Series(data=df.values - tidal.h, index=df.index)
return tide, surge, constituents


Expand Down
66 changes: 65 additions & 1 deletion analysea/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Any
from typing import cast
from typing import Dict
from typing import Iterator
from typing import Optional
from typing import Tuple
from typing import Union
Expand All @@ -20,6 +21,41 @@
# ===================
# TIME SERIES
# ===================
def resample(df: pd.DataFrame, t_rsp: int = 30) -> pd.DataFrame:
"""
Resample a pandas dataframe to a new time interval.
@param df (pd.DataFrame): The input dataframe.
@param t_rsp (int): optional, The target resample period in minutes
by default 30.
@returns (pd.DataFrame): The resampled dataframe.
"""
ts = df.resample(f"{t_rsp}min").mean().shift(freq=f"{int(t_rsp/2)}min")
return ts


def interpolate(df: pd.DataFrame, t_rsp: int = 30) -> pd.DataFrame:
"""
This function resamples a pandas dataframe to a new time interval
using linear interpolation.
It uses analysea's detect_time_step function to interpolate only
between holes in the data and not extrapolate "flat areas" in the
signal
@param df (pd.DataFrame): The input dataframe.
@param t_rsp (int): optional, The target resample period in minutes
by default 30.
@returns (pd.DataFrame): The interpolated dataframe.
"""
time_step = detect_time_step(df)
n_interp = int(t_rsp * 60 / time_step.total_seconds())
ts = df.interpolate(method="linear", limit=n_interp)
return ts


def detect_splits(sr: pd.Series, max_gap: pd.Timedelta) -> pd.DatetimeIndex:
split_points = pd.DatetimeIndex([sr.index[0], sr.index[-1]])
condition = sr.index.to_series().diff() > max_gap
Expand All @@ -28,7 +64,15 @@ def detect_splits(sr: pd.Series, max_gap: pd.Timedelta) -> pd.DatetimeIndex:
return split_points


def split_series(sr: pd.Series, max_gap: pd.Timedelta = pd.Timedelta(hours=24)) -> pd.Series:
def split_series(sr: pd.Series, max_gap: pd.Timedelta = pd.Timedelta(hours=24)) -> Iterator[pd.Series]:
"""
Splits a pandas series into segments without overlapping gaps larger than max_gap.
@param sr (pd.Series): The input series.
@param max_gap (pd.Timedelta): The maximum allowed gap between two segments.
@returns: Iterator[pd.Series]: An iterator of segments.
"""
for start, stop in itertools.pairwise(detect_splits(sr=sr, max_gap=max_gap)):
segment = sr[start:stop]
yield segment[:-1]
Expand Down Expand Up @@ -69,6 +113,26 @@ def cleanup(
despike: bool = True,
demean: bool = True,
) -> pd.DataFrame:
"""
This function cleans up a time series by removing outliers,
detecting and removing flat areas, and removing steps.
@param ts (pd.Series): The input time series.
@param clip_limits tuple[float, float]: Optional, The lower and upper
bounds for outlier detection. If None, outlier detection is not performed.
@param kurtosis (float): The threshold for detecting outliers. If the absolute
value of the kurtosis of a segment is less than this value, the segment is
considered clean.
@param remove_flats (bool): Whether to remove flat areas from the time series.
If True, flat areas are detected by comparing the difference in consecutive values.
@param despike (bool): Whether to remove outliers using the provided clip_limits.
If True and clip_limits is not None, outlier detection is performed.
@param demean (bool): Whether to demean the time series.
If True, the mean of the time series is subtracted from each value.
@returns (pd.DataFrame): The cleaned up time series.
"""
# Check if the input is empty
if ts.empty:
return pd.DataFrame()
Expand Down
Loading

0 comments on commit dc86a13

Please sign in to comment.