diff --git a/.gitignore b/.gitignore
index 8ee614c..903ece6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -178,3 +178,8 @@ bar
 test.py
 cutout.png
 *.out
+
+_build/
+?
+?.*
+~*
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index d3d0cd6..48ed5f5 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -37,7 +37,7 @@ repos:
     - --line-length=120
     - --fix
     - --exit-non-zero-on-fix
-    - --preview 
+    - --preview
     - --exclude
     - 'dev/*.py'
 #- repo: https://github.com/pre-commit/mirrors-mypy
@@ -46,3 +46,17 @@ repos:
 #  -   id: mypy
 #      verbose: true
 #      entry: bash -c 'mypy "$@" || true' --
+- repo: https://github.com/dzhu/rstfmt
+  rev: v0.0.14
+  hooks:
+    - id: rstfmt
+
+# - repo: https://github.com/rstcheck/rstcheck
+#   rev: v6.2.0
+#   hooks:
+#     - id: rstcheck
+#       args:
+#         - '--ignore-roles'
+#         - 'doc'
+#         - '--ignore-directives'
+#         - 'toctree'
diff --git a/docs/_static/logo.png b/docs/_static/logo.png
new file mode 100644
index 0000000..f78572e
Binary files /dev/null and b/docs/_static/logo.png differ
diff --git a/docs/_static/style.css b/docs/_static/style.css
new file mode 100644
index 0000000..9a2b3af
--- /dev/null
+++ b/docs/_static/style.css
@@ -0,0 +1,48 @@
+.wy-side-nav-search {
+    background-color: #f7f7f7;
+}
+
+/*There is a clash between xarray notebook styles and readthedoc*/
+
+.rst-content dl.xr-attrs dt {
+    all: revert;
+    font-size: 95%;
+    white-space: nowrap;
+}
+
+.rst-content dl.xr-attrs dd {
+    font-size: 95%;
+}
+
+.xr-wrap {
+    font-size: 85%;
+}
+
+.wy-table-responsive table td, .wy-table-responsive table th {
+    white-space: inherit;
+}
+
+/*
+.wy-table-responsive table td,
+.wy-table-responsive table th {
+    white-space: normal !important;
+    vertical-align: top !important;
+}
+
+.wy-table-responsive {
+    margin-bottom: 24px;
+    max-width: 100%;
+    overflow: visible;
+} */
+
+/* Hide notebooks warnings */
+.nboutput .stderr {
+    display: none;
+}
+
+/*
+Set logo size
+*/
+.wy-side-nav-search .wy-dropdown > a img.logo, .wy-side-nav-search > a img.logo {
+    width: 200px;
+}
diff --git a/docs/_templates/.gitkeep b/docs/_templates/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/docs/apply-fmt.sh b/docs/apply-fmt.sh
new file mode 100755
index 0000000..762a8e3
--- /dev/null
+++ b/docs/apply-fmt.sh
@@ -0,0 +1,5 @@
+:
+for n in $(find . -name '*.rst')
+do
+   rstfmt  $n 
+done
diff --git a/docs/check-index.sh b/docs/check-index.sh
new file mode 100755
index 0000000..5bd2e4f
--- /dev/null
+++ b/docs/check-index.sh
@@ -0,0 +1,7 @@
+:
+# See https://github.com/vscode-restructuredtext/vscode-restructuredtext/issues/280
+for n in $(find . -name '*.rst')
+do
+   m=$(echo $n | sed 's/\.rst//' | sed 's,^\./,,')
+   egrep ":doc:.$m" index.rst > /dev/null || echo $m
+done
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000..8889135
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,81 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+# import os
+# import sys
+# sys.path.insert(0, os.path.abspath('.'))
+
+import datetime
+
+# top = os.path.realpath(os.path.dirname(os.path.dirname(__file__)))
+# sys.path.insert(0, top)
+
+
+source_suffix = ".rst"
+master_doc = "index"
+pygments_style = "sphinx"
+html_theme_options = {"logo_only": True}
+html_logo = "_static/logo.png"
+
+
+# -- Project information -----------------------------------------------------
+
+project = "Anemoi"
+
+author = "ECMWF"
+
+year = datetime.datetime.now().year
+if year == 2024:
+    years = "2024"
+else:
+    years = "2024-%s" % (year,)
+
+copyright = "%s, ECMWF" % (years,)
+
+
+release = "0.1.0"
+
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    "sphinx_rtd_theme",
+    "nbsphinx",
+]
+
+# Add any paths that contain templates here, relative to this directory.
+# templates_path = ["_templates"]
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "'**.ipynb_checkpoints'"]
+
+
+# https://www.notion.so/Deepnote-Launch-Buttons-63c642a5e875463495ed2341e83a4b2a
+
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = "sphinx_rtd_theme"
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ["_static"]
+html_css_files = ["style.css"]
diff --git a/docs/datasets/about.rst b/docs/datasets/about.rst
new file mode 100644
index 0000000..f0b6f57
--- /dev/null
+++ b/docs/datasets/about.rst
@@ -0,0 +1,27 @@
+##################
+ Training dataset
+##################
+
+Training datasets are large array-like objects encode in Zarr_ format.
+They
+
+The array has the following dimensions:
+
+.. figure:: data.png
+   :alt: Data layout
+
+The first dimension is the time dimension, the second dimension are the
+variables (e.g. temperature, pressure, etc), the third dimension is the
+ensemble, and fourth dimension are the grid points values.
+
+This structure provides an efficient way to build the training dataset,
+as input and output of the model are simply consecutive slices of the
+array.
+
+.. code:: python
+
+   x, y  = ds[n], ds[n+1]
+   y_hat = model.predict(x)
+   loss  = model.loss(y, y_hat)
+
+.. _zarr: https://zarr.readthedocs.io/
diff --git a/docs/datasets/build.png b/docs/datasets/build.png
new file mode 100644
index 0000000..1dbe5ff
Binary files /dev/null and b/docs/datasets/build.png differ
diff --git a/docs/datasets/building.rst b/docs/datasets/building.rst
new file mode 100644
index 0000000..1bd8ecc
--- /dev/null
+++ b/docs/datasets/building.rst
@@ -0,0 +1,80 @@
+.. _datasets-building:
+
+###################
+ Building datasets
+###################
+
+..
+   .. figure:: build.png
+
+..
+   :alt: Building datasets
+
+..
+   :scale: 50%
+
+**********
+ Concepts
+**********
+
+date
+   Throughout this document, the term `date` refers to a date and time,
+   not just a date. A training dataset is covers a continuous range of
+   dates with a given frequency. Missing dates are still part of the
+   dataset, but the data are missing and marked as such using NaNs.
+   Dates are always in UTC, and refer to date at which the data is
+   valid. For accumulations and fluxes, that would be the end of the
+   accumulation period.
+
+variable
+   A `variable` is meteorological parameter, such as temperature, wind,
+   etc. Multilevel parameters are treated as separate variables, one for
+   each level. For example, temperature at 850 hPa and temperature at
+   500 hPa will be treated as two separate variables (`t_850` and
+   `t_500`).
+
+field
+   A `field` is a variable at a given date. It is represented by a array
+   of values at each grid point.
+
+source
+   The `source` is a software component that given a list of dates and
+   variables will return the corresponding fields. A example of source
+   is ECMWF's MARS archive, a collection of GRIB or NetCDF files, a
+   database, etc. See :ref:`dataset-sources` for more information.
+
+filter
+   A `filter` is a software component that takes as input the output of
+   a source or the output of another filter can modify the fields and/or
+   their metadata. For example, typical filters are interpolations,
+   renaming of variables, etc. See :ref:`dataset-filters` for more
+   information.
+
+************
+ Operations
+************
+
+In order to build a training dataset, sources and filters are combined
+using the following operations:
+
+join
+   The join is the process of combining several sources data. Each
+   source is expected to provide different variables at the same dates.
+
+pipe
+   The pipe is the process of transforming fields using filters. The
+   first step of a pipe is typically a source, a join or another pipe.
+   The following steps are filters.
+
+concat
+   The concatenation is the process of combining different sets of
+   operation that handle different dates. This is typically used to
+   build a dataset that spans several years, when the several sources
+   are involved, each providing a different period.
+
+*****************
+ Getting started
+*****************
+
+.. literalinclude:: building.yaml
+   :language: yaml
diff --git a/docs/datasets/building.yaml b/docs/datasets/building.yaml
new file mode 100644
index 0000000..c991100
--- /dev/null
+++ b/docs/datasets/building.yaml
@@ -0,0 +1,44 @@
+description: Example dataset
+
+dates:
+  start: 2020-01-01 00:00:00
+  end: 2023-12-31 18:00:00
+  frequency: 6h
+
+build:
+  group_by: monthly
+
+input:
+  join:
+    - mars:
+        class: ea
+        param: [10u, 10v, 2d, 2t, msl, skt, sp, tcw, lsm, sdor, slor, z]
+        levtype: sfc
+
+    - mars:
+        class: ea
+        param: [r, t, u, v, w, z]
+        levtype: pl
+        level: [50, 100, 150, 200, 250, 300, 400, 500, 700, 850, 925, 1000]
+
+    - constants:
+        template: ${input.join.0.mars}
+        param:
+        - cos_latitude
+        - cos_longitude
+        - sin_latitude
+        - sin_longitude
+        - cos_julian_day
+        - cos_local_time
+        - sin_julian_day
+        - sin_local_time
+        - insolation
+
+output:
+  order_by:
+    - valid_datetime
+    - param_level
+    - number
+  statistics: param_level
+  remapping:
+    param_level: "{param}_{levelist}"
diff --git a/docs/datasets/concat.png b/docs/datasets/concat.png
new file mode 100644
index 0000000..0832de9
Binary files /dev/null and b/docs/datasets/concat.png differ
diff --git a/docs/datasets/data.png b/docs/datasets/data.png
new file mode 100644
index 0000000..cea4090
Binary files /dev/null and b/docs/datasets/data.png differ
diff --git a/docs/datasets/filters.rst b/docs/datasets/filters.rst
new file mode 100644
index 0000000..aefac54
--- /dev/null
+++ b/docs/datasets/filters.rst
@@ -0,0 +1,5 @@
+.. _dataset-filters:
+
+#########
+ Filters
+#########
diff --git a/docs/datasets/images.pptx b/docs/datasets/images.pptx
new file mode 100644
index 0000000..b73a331
Binary files /dev/null and b/docs/datasets/images.pptx differ
diff --git a/docs/datasets/join.png b/docs/datasets/join.png
new file mode 100644
index 0000000..41c2082
Binary files /dev/null and b/docs/datasets/join.png differ
diff --git a/docs/datasets/options.rst b/docs/datasets/options.rst
new file mode 100644
index 0000000..a12e3c5
--- /dev/null
+++ b/docs/datasets/options.rst
@@ -0,0 +1,282 @@
+#########
+ Options
+#########
+
+These are equivalent:
+
+.. code:: python
+
+   ds = open_dataset(path)
+   ds = open_dataset(dataset=path)
+   ds = open_dataset({"dataset": path})
+
+The last example is useful when the dataset is defined from a
+configuration file:
+
+.. code:: python
+
+   with open("config.yaml") as file:
+       config = yaml.safe_load(file)
+
+   ds = open_dataset(config)
+
+When defining a dataset from another, you can either use a path or a
+dataset:
+
+.. code:: python
+
+   open_dataset(path, statistics=other_path)
+   open_dataset(path, statistics=other_dataset)
+   open_dataset(path, statistics={"dataset": other_path, ...})
+
+This also applies when combining datasets:
+
+.. code:: python
+
+   open_dataset(ensembles=[dataset1, dataset2, ...])
+   open_dataset(ensembles=[path1, path2, ...])
+   open_dataset(ensembles=[dataset1, path2, ...])
+   open_dataset(ensembles=[{"dataset": path1, ...}, {"dataset": path2, ...}, ...])
+
+*********
+ Options
+*********
+
+.. code:: python
+
+   open_dataset(
+       dataset,
+       start=None,
+       end=None,
+       frequency=None,
+       select=None,
+       drop=None,
+       reorder=None,
+       rename=None,
+       statistics=None,
+       thinning=None,
+       area=None,
+       ensembles=None,
+       grids=None,
+       method=None,
+   )
+
+dataset
+=======
+
+This is a path or URL to a ``zarr`` file that has been created with this
+package, as described in :ref:`datasets-building`.
+
+.. code:: python
+
+   from ecml_tools.data import open_dataset
+
+   ds = open_dataset("aifs-ea-an-oper-0001-mars-o96-1979-2022-1h-v2")
+   ds = open_dataset("/path/to/datasets/aifs-ea-an-oper-0001-mars-o96-1979-2022-1h-v2.zarr")
+   ds = open_dataset("https://example.com/aifs-ea-an-oper-0001-mars-o96-1979-2022-1h-v2.zarr")
+   ds = open_dataset("s3://bucket/aifs-ea-an-oper-0001-mars-o96-1979-2022-1h-v2.zarr")
+
+Alternatively, you can pass an already opened dataset:
+
+.. code:: python
+
+   from ecml_tools.data import open_dataset
+
+   ds1 = open_dataset("aifs-ea-an-oper-0001-mars-o96-1979-2022-1h-v2")
+   ds2 = open_dataset(ds1, start=1979, end=2020)
+
+start
+=====
+
+This option let you subset the dataset by time. You can pass a date or a
+
+.. code:: python
+
+   open_dataset(dataset, start=1980)
+
+end
+===
+
+As for the start option, you can pass a date or a string:
+
+.. code:: python
+
+   open_dataset(dataset, end="2020-12-31")
+
+The following are equivalent way of describing ``start`` or ``end``:
+
+-  ``2020`` and ``"2020"``
+-  ``202306``, ``"202306"`` and ``"2023-06"``
+-  ``20200301``, ``"20200301"`` and ``"2020-03-01"``
+
+frequency
+=========
+
+You can change the frequency of the dataset by passing a string with the
+
+.. code:: python
+
+   ds = open_dataset("aifs-ea-an-oper-0001-mars-o96-1979-2022-1h-v2", frequency="6h")
+
+select
+======
+
+.. code:: python
+
+   # Select '2t' and 'tp' in that order
+
+   ds = open_dataset(
+       "aifs-ea-an-oper-0001-mars-o96-1979-2022-1h-v2",
+       select = ["2t", "tp"],
+   )
+
+.. code:: python
+
+   # Select '2t' and 'tp', but preserve the order in which they are in the dataset
+
+   ds = open_dataset(
+       "aifs-ea-an-oper-0001-mars-o96-1979-2022-1h-v2",
+       select = {"2t", "tp"},
+   )
+
+drop
+====
+
+You can also drop some variables:
+
+.. code:: python
+
+   ds = open_dataset(
+       "aifs-ea-an-oper-0001-mars-o96-1979-2022-1h-v2",
+       drop = ["10u", "10v"],
+   )
+
+reorder
+=======
+
+and reorder them:
+
+... using a list
+
+.. code:: python
+
+   ds = open_dataset(
+       "aifs-ea-an-oper-0001-mars-o96-1979-2022-1h-v2",
+       reorder = ["2t", "msl", "sp", "10u", "10v"],
+   )
+
+... or using a dictionary
+
+.. code:: python
+
+   ds = open_dataset(
+       "aifs-ea-an-oper-0001-mars-o96-1979-2022-1h-v2",
+       reorder = {"2t": 0, "msl": 1, "sp": 2, "10u": 3, "10v": 4},
+   )
+
+rename
+======
+
+You can also rename variables:
+
+.. code:: python
+
+    ds = open_dataset(
+       "aifs-ea-an-oper-0001-mars-o96-1979-2022-1h-v2",
+       rename = {"2t": "t2m"},
+   )
+
+This will be useful when your join datasets and do not want variables
+from one dataset to override the ones from the other.
+
+.. _statistics:
+
+statistics
+==========
+
+.. code:: python
+
+   open_dataset(dataset, statistics=other_dataset)
+
+thinning
+========
+
+.. code:: python
+
+   open_dataset(dataset, thinning=..., method="every-nth")
+
+area
+====
+
+********************
+ Combining datasets
+********************
+
+When combining datasets, the statistics of the first dataset are used by
+default. You can change this by setting the :ref:`statistics` option to
+a different dataset, even if it is not part of the combination. See
+
+concat
+======
+
+You can concatenate two or more datasets along the dates dimension. The
+package will check that all datasets are compatible (same resolution,
+same variables, etc.). Currently, the datasets must be given in
+chronological order with no gaps between them.
+
+.. code:: python
+
+   ds = open_dataset(
+       "aifs-ea-an-oper-0001-mars-o96-1940-1978-1h-v2",
+       "aifs-ea-an-oper-0001-mars-o96-1979-2022-1h-v2"
+   )
+
+.. image:: concat.png
+   :alt: Concatenation
+
+Please note that you can pass more than two ``zarr`` files to the
+function.
+
+   **NOTE:** When concatenating file, the statistics are not recomputed;
+   it is the statistics of first file that are returned to the user.
+
+join
+====
+
+You can join two datasets that have the same dates, combining their
+variables.
+
+.. code:: python
+
+   from ecml_tools.data import open_dataset
+
+   ds = open_dataset(
+       "aifs-ea-an-oper-0001-mars-o96-1979-2022-1h-v2",
+       "some-extra-parameters-from-another-source-o96-1979-2022-1h-v2",
+   )
+
+.. image:: join.png
+   :alt: Join
+
+If a variable is present in more that one file, that last occurrence of
+that variable will be used, and will be at the position of the first
+occurrence of that name.
+
+.. image:: overlay.png
+   :alt: Overlay
+
+Please note that you can join more than two ``zarr`` files.
+
+ensembles
+=========
+
+.. code:: python
+
+   open_dataset(ensembles=[dataset1, dataset2, ...])
+
+grids
+=====
+
+.. code:: python
+
+   open_dataset(grids=[dataset1, dataset2, ...], method=...)
diff --git a/docs/datasets/overlay.png b/docs/datasets/overlay.png
new file mode 100644
index 0000000..e221539
Binary files /dev/null and b/docs/datasets/overlay.png differ
diff --git a/docs/datasets/sources.rst b/docs/datasets/sources.rst
new file mode 100644
index 0000000..914d913
--- /dev/null
+++ b/docs/datasets/sources.rst
@@ -0,0 +1,41 @@
+.. _dataset-sources:
+
+#########
+ Sources
+#########
+
+..
+   ******
+
+..
+   mars
+
+..
+   ******
+
+..
+   ******
+
+..
+   grib
+
+..
+   ******
+
+..
+   ********
+
+..
+   netcdf
+
+..
+   ********
+
+..
+   *********
+
+..
+   opendap
+
+..
+   *********
diff --git a/docs/datasets/using.rst b/docs/datasets/using.rst
new file mode 100644
index 0000000..e0d9900
--- /dev/null
+++ b/docs/datasets/using.rst
@@ -0,0 +1,3 @@
+################
+ Using datasets
+################
diff --git a/docs/examples.rst b/docs/examples.rst
new file mode 100644
index 0000000..8f14102
--- /dev/null
+++ b/docs/examples.rst
@@ -0,0 +1,14 @@
+.. _examples:
+
+##########
+ Examples
+##########
+
+Here is a list of example notebooks to illustrate how to access data,
+create plots, and do machine learning using CliMetLab.
+
+.. toctree::
+   :maxdepth: 2
+   :glob:
+
+   examples/*
diff --git a/docs/examples/00-example1.ipynb b/docs/examples/00-example1.ipynb
new file mode 100644
index 0000000..dec02a7
--- /dev/null
+++ b/docs/examples/00-example1.ipynb
@@ -0,0 +1,25 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Hello"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docs/firststeps.rst b/docs/firststeps.rst
new file mode 100644
index 0000000..efe3dde
--- /dev/null
+++ b/docs/firststeps.rst
@@ -0,0 +1,3 @@
+#############
+ First steps
+#############
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 0000000..8f48dbe
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,67 @@
+####################################
+ Welcome to Anemoi's documentation!
+####################################
+
+.. warning::
+
+   This documentation is work in progress. It is not yet ready.
+   Currently, the documentation is based on the one from the ecml-tools_
+   project, which will be merged into Anemoi.
+
+*Anemoi* is a framework for developing machine learning weather
+forecasting models. It comprises of components or packages for preparing
+training datasets, conducting ML model training and a registry for
+datasets and trained models. Anemoi provides tools for operational
+inference, including interfacing to verification software. As a
+framework it seeks to handle many of the complexities that
+meteorological organisations will share, allowing them to easily train
+models from existing recipes but with their own data.
+
+-  :doc:`overview`
+-  :doc:`installing`
+-  :doc:`firststeps`
+-  :doc:`examples`
+
+.. toctree::
+   :maxdepth: 1
+   :hidden:
+
+   overview
+   installing
+   firststeps
+   examples
+
+**Datasets**
+
+-  :doc:`datasets/about`
+-  :doc:`datasets/building`
+-  :doc:`datasets/sources`
+-  :doc:`datasets/filters`
+-  :doc:`datasets/using`
+-  :doc:`datasets/options`
+
+.. toctree::
+   :maxdepth: 1
+   :hidden:
+   :caption: Training datasets
+
+   datasets/about
+   datasets/building
+   datasets/sources
+   datasets/filters
+   datasets/using
+   datasets/options
+
+*********
+ License
+*********
+
+*Anemoi* is available under the open source `Apache License`__.
+
+.. __: http://www.apache.org/licenses/LICENSE-2.0.html
+
+.. _ecml-tools: https://github.com/ecmwf-lab/ecml-tools
+
+.. _pytorch: https://pytorch.org
+
+.. _zarr: https://zarr.readthedocs.io/
diff --git a/docs/installing.rst b/docs/installing.rst
new file mode 100644
index 0000000..5ae334b
--- /dev/null
+++ b/docs/installing.rst
@@ -0,0 +1,3 @@
+############
+ Installing
+############
diff --git a/docs/overview.rst b/docs/overview.rst
new file mode 100644
index 0000000..5af7ff1
--- /dev/null
+++ b/docs/overview.rst
@@ -0,0 +1,6 @@
+##########
+ Overview
+##########
+
+This documentation is currently being written. For now, only the
+creation and use of training datasets is documented.
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 0000000..c85e9f1
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,9 @@
+# These are the requirements for readthedoc
+sphinx
+sphinx_rtd_theme
+nbsphinx
+
+# Also requires `brew install pandoc` on Mac
+pandoc
+
+rstfmt
diff --git a/ecml_tools/data/concat.py b/ecml_tools/data/concat.py
index 5830439..12fa790 100644
--- a/ecml_tools/data/concat.py
+++ b/ecml_tools/data/concat.py
@@ -6,6 +6,7 @@
 # nor does it submit to any jurisdiction.
 
 import logging
+from functools import cached_property
 
 import numpy as np
 
@@ -23,7 +24,8 @@
 LOG = logging.getLogger(__name__)
 
 
-class Concat(Combined):
+class ConcatMixin:
+
     def __len__(self):
         return sum(len(i) for i in self.datasets)
 
@@ -65,6 +67,18 @@ def _get_slice(self, s):
 
         return np.concatenate(result)
 
+    @cached_property
+    def missing(self):
+        result = set()
+        offset = 0
+        for d in self.datasets:
+            result = result | set(m + offset for m in d.missing)
+            offset += len(d)
+        return result
+
+
+class Concat(ConcatMixin, Combined):
+
     def check_compatibility(self, d1, d2):
         super().check_compatibility(d1, d2)
         self.check_same_sub_shapes(d1, d2, drop_axis=0)
diff --git a/ecml_tools/data/dataset.py b/ecml_tools/data/dataset.py
index f9be152..4706b18 100644
--- a/ecml_tools/data/dataset.py
+++ b/ecml_tools/data/dataset.py
@@ -83,6 +83,15 @@ def _subset(self, **kwargs):
             bbox = kwargs.pop("area")
             return Cropping(self, bbox)._subset(**kwargs)
 
+        # Keep last
+        if "shuffle" in kwargs:
+            from .subset import Subset
+
+            shuffle = kwargs.pop("shuffle")
+
+            if shuffle:
+                return Subset(self, self._shuffle_indices())._subset(**kwargs)
+
         raise NotImplementedError("Unsupported arguments: " + ", ".join(kwargs))
 
     def _frequency_to_indices(self, frequency):
@@ -96,6 +105,11 @@ def _frequency_to_indices(self, frequency):
 
         return range(0, len(self), step)
 
+    def _shuffle_indices(self):
+        import numpy as np
+
+        return np.random.permutation(len(self))
+
     def _dates_to_indices(self, start, end):
         from .misc import _as_first_date
         from .misc import _as_last_date
diff --git a/ecml_tools/data/misc.py b/ecml_tools/data/misc.py
index 6f4a83a..c522ba4 100644
--- a/ecml_tools/data/misc.py
+++ b/ecml_tools/data/misc.py
@@ -255,6 +255,16 @@ def _open_dataset(*args, zarr_root, **kwargs):
     for a in args:
         sets.append(_open(a, zarr_root))
 
+    if "zip" in kwargs:
+        from .unchecked import zip_factory
+
+        assert not sets, sets
+        return zip_factory(args, kwargs, zarr_root)
+    if "chain" in kwargs:
+        from .unchecked import chain_factory
+
+        assert not sets, sets
+        return chain_factory(args, kwargs, zarr_root)
     if "join" in kwargs:
         from .join import join_factory
 
diff --git a/ecml_tools/data/unchecked.py b/ecml_tools/data/unchecked.py
new file mode 100644
index 0000000..0b42367
--- /dev/null
+++ b/ecml_tools/data/unchecked.py
@@ -0,0 +1,170 @@
+# (C) Copyright 2024 European Centre for Medium-Range Weather Forecasts.
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+
+import logging
+from functools import cached_property
+from functools import wraps
+
+from .concat import ConcatMixin
+from .debug import Node
+from .forewards import Combined
+from .misc import _auto_adjust
+from .misc import _open
+
+LOG = logging.getLogger(__name__)
+
+
+class check:
+
+    def __init__(self, check):
+        self.check = check
+
+    def __call__(self, method):
+        name = method.__name__
+        check = self.check
+
+        @wraps(method)
+        def wrapper(obj):
+            """
+            This is a decorator that checks the compatibility of the datasets
+            before calling the method. If the datasets are compatible, it
+            will return the result of the method, otherwise it will raise an
+            exception.
+            """
+
+            for d in obj.datasets[1:]:
+                getattr(obj, check)(obj.datasets[0], d)
+
+            return getattr(Combined, name).__get__(obj)
+
+        return wrapper
+
+
+class Unchecked(Combined):
+
+    def tree(self):
+        return Node(self, [d.tree() for d in self.datasets])
+
+    def _subset(self, **kwargs):
+        assert not kwargs
+        return self
+
+    def check_compatibility(self, d1, d2):
+        pass
+
+    ###########################################
+    @property
+    @check("check_same_dates")
+    def dates(self):
+        pass
+
+    @property
+    @check("check_same_resolution")
+    def resolution(self):
+        pass
+
+    @property
+    def field_shape(self):
+        raise NotImplementedError()
+
+    @property
+    @check("check_same_frequency")
+    def frequency(self):
+        raise NotImplementedError()
+
+    @property
+    @check("check_same_grid")
+    def latitudes(self):
+        raise NotImplementedError()
+
+    @property
+    @check("check_same_grid")
+    def longitudes(self):
+        raise NotImplementedError()
+
+    @property
+    @check("check_same_variables")
+    def name_to_index(self):
+        raise NotImplementedError()
+
+    @property
+    @check("check_same_variables")
+    def variables(self):
+        raise NotImplementedError()
+
+    @property
+    @check("check_same_variables")
+    def statistics(self):
+        raise NotImplementedError()
+
+    @property
+    def shape(self):
+        raise NotImplementedError()
+
+    @property
+    def dtype(self):
+        raise NotImplementedError()
+
+    @property
+    def grids(self):
+        raise NotImplementedError()
+
+
+class Zip(Unchecked):
+
+    def __len__(self):
+        return min(len(d) for d in self.datasets)
+
+    def __getitem__(self, n):
+        return tuple(d[n] for d in self.datasets)
+
+    @cached_property
+    def missing(self):
+        result = set()
+        for d in self.datasets:
+            result = result | d.missing
+        return result
+
+
+class Chain(ConcatMixin, Unchecked):
+    """
+    Same as Concat, but with no checks
+    """
+
+    def __len__(self):
+        return sum(len(d) for d in self.datasets)
+
+    def __getitem__(self, n):
+        return tuple(d[n] for d in self.datasets)
+
+    @property
+    def dates(self):
+        raise NotImplementedError()
+
+
+def zip_factory(args, kwargs, zarr_root):
+
+    zip = kwargs.pop("zip")
+    assert len(args) == 0
+    assert isinstance(zip, (list, tuple))
+
+    datasets = [_open(e, zarr_root) for e in zip]
+    datasets, kwargs = _auto_adjust(datasets, kwargs)
+
+    return Zip(datasets)._subset(**kwargs)
+
+
+def chain_factory(args, kwargs, zarr_root):
+
+    chain = kwargs.pop("chain")
+    assert len(args) == 0
+    assert isinstance(chain, (list, tuple))
+
+    datasets = [_open(e, zarr_root) for e in chain]
+    datasets, kwargs = _auto_adjust(datasets, kwargs)
+
+    return Chain(datasets)._subset(**kwargs)