Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature: add toy data #493

Merged
merged 25 commits into from
Jan 9, 2025
Merged
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
598d62f
ignore data except for .msk files
jsmariegaard Dec 26, 2024
a5c308b
data module
jsmariegaard Dec 26, 2024
5588cab
add vistula dataset
jsmariegaard Dec 26, 2024
0a7f203
ms.data module
jsmariegaard Dec 26, 2024
4114a2b
return 1 for now
jsmariegaard Dec 26, 2024
6a72d6e
test data module
jsmariegaard Dec 26, 2024
be7e3ab
load vistula data
jsmariegaard Dec 26, 2024
6d9d4ee
Add oresund dataset
jsmariegaard Dec 26, 2024
dc4b0f7
better way to find _DATA_ROOT
jsmariegaard Dec 26, 2024
744233e
Make sure data files are part of the package.
ecomodeller Jan 2, 2025
bdbef65
Whitelist include
ecomodeller Jan 2, 2025
ca02c72
float32, fewer stations and modeldata cropped to time span
jsmariegaard Jan 4, 2025
362cd5b
Add data to api docs
jsmariegaard Jan 4, 2025
32b63e6
Simple notebook to show the builtin datasets
jsmariegaard Jan 4, 2025
aed2ffe
Merge branch 'Feature-489-add-toy-data' of https://github.com/DHI/mod…
jsmariegaard Jan 7, 2025
f80c1a5
remove Path(__file__) way of finding data files
jsmariegaard Jan 7, 2025
d322d64
Merge branch 'main' of https://github.com/DHI/modelskill into Feature…
jsmariegaard Jan 7, 2025
91bf990
Reduced vistula dataset, now less than 1MB
jsmariegaard Jan 8, 2025
3a39cfe
Rerun with reduced datasets and add skill tables
jsmariegaard Jan 8, 2025
2975d57
Added attrs and aux data
jsmariegaard Jan 8, 2025
8021a8e
Added precipitation data to vistula and attrs
jsmariegaard Jan 8, 2025
de1c3f1
any many asserts
jsmariegaard Jan 8, 2025
b93a6cb
improved docstring
jsmariegaard Jan 8, 2025
472eeae
rerun notebook
jsmariegaard Jan 8, 2025
b82c6eb
Merge branch 'main' of https://github.com/DHI/modelskill into Feature…
jsmariegaard Jan 8, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -130,7 +130,9 @@ dmypy.json
# Pyre type checker
.pyre/

/data/
# ignore data except for .msk files
data/*
!data/*.msk
tests/testdata/tmp
/tmp/

3 changes: 3 additions & 0 deletions docs/_quarto.yml
Original file line number Diff line number Diff line change
@@ -125,6 +125,8 @@ website:
href: api/settings.qmd
- text: "Quantity"
href: api/Quantity.qmd
- text: "Data"
href: api/data.qmd


filters:
@@ -277,6 +279,7 @@ quartodoc:
- settings
- TimeSeries
- Quantity
- data

#metadata-files:
# - api/_sidebar.yml
2 changes: 2 additions & 0 deletions modelskill/__init__.py
Original file line number Diff line number Diff line change
@@ -38,6 +38,7 @@
from .configuration import from_config
from .settings import options, get_option, set_option, reset_option, load_style
from . import plotting
from . import data
from .comparison import ComparerCollection, Comparer
from .skill import SkillTable
from .timeseries import TimeSeries
@@ -89,4 +90,5 @@ def load(filename: Union[str, Path]) -> ComparerCollection:
"load_style",
"plotting",
"from_config",
"data",
]
53 changes: 53 additions & 0 deletions modelskill/data/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
"""
Toy datasets for testing and demonstration purposes

Examples
--------
```{python}
>>> import modelskill as ms
>>> cc = ms.data.vistula()
>>> cc
```
```{python}
>>> cc = ms.data.oresund()
>>> cc
```
"""

from importlib.resources import files

import modelskill as ms
from ..comparison import ComparerCollection


def vistula() -> ComparerCollection:
"""5-year daily discharge data for Vistula catchment, Poland

Contains discharge data for 8 stations along the Vistula river
compared with two hydrological models "sim1" and "sim2".

The dataset additionally contains precipitation data as aux data
and metadata about the river and the catchment area in the attrs dictionary.

Returns
-------
ComparerCollection
"""
fn = str(files("modelskill.data") / "vistula.msk")
return ms.load(fn)


def oresund() -> ComparerCollection:
"""Oresund water level data for Jan-June 2022 compared with MIKE21 model

Contains water level data for 7 stations along the Oresund strait with
metadata about the country in the attrs dictionary.

The dataset contains additional ERA5 wind-components U10 and V10 aux data.

Returns
-------
ComparerCollection
"""
fn = str(files("modelskill.data") / "oresund.msk")
return ms.load(fn)
Binary file added modelskill/data/oresund.msk
Binary file not shown.
Binary file added modelskill/data/vistula.msk
Binary file not shown.
760 changes: 760 additions & 0 deletions notebooks/Builtin_datasets.ipynb

Large diffs are not rendered by default.

6 changes: 4 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -2,8 +2,10 @@
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.hatch.build.targets.sdist]
exclude = ["notebooks", "tests"]
[tool.hatch.build]
include = [
"modelskill/**/*",
]

[tool.hatch.metadata]
allow-direct-references = true
52 changes: 52 additions & 0 deletions tests/test_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import os

import pytest
import modelskill as ms


@pytest.fixture
def change_test_directory(tmp_path):
original_directory = os.getcwd()
os.chdir(tmp_path)
yield tmp_path
os.chdir(original_directory)


def test_load_vistula(change_test_directory):
cc = ms.data.vistula()
assert isinstance(cc, ms.ComparerCollection)
assert len(cc) == 8
assert cc.mod_names == ["sim1", "sim2"]
assert cc[0].name == "Tczew"
assert cc[-1].n_points == 1827
assert cc[-1].y == 52.94889

assert cc[0].quantity.name == "Discharge"
assert cc[0].quantity.unit == "m3/s"

assert cc.aux_names == ["Precipitation"]
assert float(cc[1].data.Precipitation[0]) == pytest.approx(1.18)
assert cc[0].attrs["River"] == "Vistula"
assert cc[0].attrs["Area"] == 193922.9

assert cc[1].raw_mod_data["sim2"].n_points == 1827
assert isinstance(cc[0].raw_mod_data["sim1"], ms.PointModelResult)


def test_load_oresund(change_test_directory):
cc = ms.data.oresund()
assert isinstance(cc, ms.ComparerCollection)
assert len(cc) == 7
assert cc.mod_names == ["MIKE21"]
assert cc[-1].name == "Vedbaek"
assert cc[0].n_points == 8422
assert cc[0].x == pytest.approx(12.7117)

assert cc[0].quantity.name == "Surface Elevation"
assert cc[0].quantity.unit == "meter"

assert cc.aux_names == ["U10", "V10"]
assert cc[-1].attrs["Country"] == "DK"

assert cc[1].raw_mod_data["MIKE21"].n_points == 4344
assert isinstance(cc[0].raw_mod_data["MIKE21"], ms.PointModelResult)