From 6acef240f2f534115a6b78c54b4d88e41e5e8916 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Wed, 10 Jul 2024 16:02:57 +0200 Subject: [PATCH 1/3] Add Datacard.segment_duration_distribution() --- audbcards/core/datacard.py | 77 +++++++++++++++++++ audbcards/core/templates/datacard_header.j2 | 3 + .../rendered_templates/medium_db.rst | 1 + 3 files changed, 81 insertions(+) diff --git a/audbcards/core/datacard.py b/audbcards/core/datacard.py index ba1cf127..02df86d9 100644 --- a/audbcards/core/datacard.py +++ b/audbcards/core/datacard.py @@ -339,6 +339,82 @@ def save(self, file: str = None): with open(file, mode="w", encoding="utf-8") as fp: fp.write(self.content) + @property + def segment_duration_distribution(self) -> str: + r"""Minimum and maximum of segment durations, and plotted distribution. + + This generates a single line + containing the mininimum and maximum values + of segment durations. + + If :attr:`audbcards.Datacard.sphinx_src_dir` is not ``None`` + (e.g. when used in the sphinx extension), + an image is stored in the file + ``--segment-duration-distribution.png``, + which is cached in + ``///`` + and copied to the sphinx source folder + into + ``//``. + The image is displayed inline + between the minimum and maximum values. + If all duration values are the same, + no distribution plot is created. + + """ + file_name = ( + f"{self.dataset.name}-{self.dataset.version}" + "-segment-duration-distribution.png" + ) + # Cache is organized as `///` + cache_file = audeer.path( + self.cache_root, + self.dataset.name, + self.dataset.version, + file_name, + ) + + min_ = 0 + max_ = 0 + unit = "s" + durations = self.dataset.segment_durations + if len(durations) > 0: + min_ = np.min(durations) + max_ = np.max(durations) + + # Skip creating a distribution plot, + # if all durations are the same + if min_ == max_: + return f"each file is {max_:.1f} {unit}" + + distribution_str = f"{min_:.1f} {unit} .. {max_:.1f} {unit}" + + # Save distribution plot + if self.sphinx_src_dir is not None: + # Plot distribution to cache, + # if not found there already. + if not os.path.exists(cache_file): + audeer.mkdir(os.path.dirname(cache_file)) + self._plot_distribution(durations) + plt.savefig(cache_file, transparent=True) + plt.close() + + image_file = audeer.path( + self.sphinx_src_dir, + self.path, + self.dataset.name, + file_name, + ) + audeer.mkdir(os.path.dirname(image_file)) + shutil.copyfile(cache_file, image_file) + distribution_str = self._inline_image( + f"{min_:.1f} {unit}", + f"./{self.dataset.name}/{file_name}", + f"{max_:.1f} {unit}", + ) + + return distribution_str + def _inline_image( self, text1: str, @@ -442,6 +518,7 @@ def _expand_dataset( player = self.player() dataset["player"] = player dataset["file_duration_distribution"] = self.file_duration_distribution + dataset["segment_duration_distribution"] = self.segment_duration_distribution return dataset def _render_template(self) -> str: diff --git a/audbcards/core/templates/datacard_header.j2 b/audbcards/core/templates/datacard_header.j2 index f36a25fe..f8a309a5 100644 --- a/audbcards/core/templates/datacard_header.j2 +++ b/audbcards/core/templates/datacard_header.j2 @@ -26,6 +26,9 @@ sampling rate {{ sampling_rates|join(', ') }} bit depth {{ bit_depths|join(', ') }} duration {{ duration }} files {{ files }}, duration distribution: {{ file_duration_distribution }} +{% if segments != "0" %} +segments {{ segments }}, duration distribution: {{ segment_duration_distribution }} +{% endif %} repository `{{ repository }} <{{ repository_link }}>`__ published {{ publication_date }} by {{ publication_owner }} ============= ====================== diff --git a/tests/test_data/rendered_templates/medium_db.rst b/tests/test_data/rendered_templates/medium_db.rst index dc1429b6..609dee8e 100644 --- a/tests/test_data/rendered_templates/medium_db.rst +++ b/tests/test_data/rendered_templates/medium_db.rst @@ -19,6 +19,7 @@ sampling rate 8000 bit depth 16 duration 0 days 00:05:02 files 2, duration distribution: 1.0 s |medium_db-1.0.0-file-duration-distribution| 301.0 s +segments 4, duration distribution: 0.5 s |medium_db-1.0.0-segment-duration-distribution| 151.0 s repository `data-local <.../data-local/medium_db>`__ published 2023-04-05 by author ============= ====================== From f871fa1f82747899e104a70f9d0417863fa208f0 Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Wed, 10 Jul 2024 16:06:24 +0200 Subject: [PATCH 2/3] Fix test --- tests/test_data/rendered_templates/medium_db.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_data/rendered_templates/medium_db.rst b/tests/test_data/rendered_templates/medium_db.rst index 609dee8e..b5c2bb18 100644 --- a/tests/test_data/rendered_templates/medium_db.rst +++ b/tests/test_data/rendered_templates/medium_db.rst @@ -1,4 +1,5 @@ .. |medium_db-1.0.0-file-duration-distribution| image:: ./medium_db/medium_db-1.0.0-file-duration-distribution.png +.. |medium_db-1.0.0-segment-duration-distribution| image:: ./medium_db/medium_db-1.0.0-segment-duration-distribution.png .. _datasets-medium_db: From e7dc5c9bf9de5bcf88c7e0c2d74d92a694305acc Mon Sep 17 00:00:00 2001 From: Hagen Wierstorf Date: Fri, 26 Jul 2024 16:19:30 +0200 Subject: [PATCH 3/3] Documenting behavior for missing segments --- audbcards/core/datacard.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/audbcards/core/datacard.py b/audbcards/core/datacard.py index 02df86d9..52e20018 100644 --- a/audbcards/core/datacard.py +++ b/audbcards/core/datacard.py @@ -106,6 +106,7 @@ def file_duration_distribution(self) -> str: If :attr:`audbcards.Datacard.sphinx_src_dir` is not ``None`` (e.g. when used in the sphinx extension), + and the dataset contains audio or video files, an image is stored in the file ``--file-duration-distribution.png``, which is cached in @@ -349,6 +350,7 @@ def segment_duration_distribution(self) -> str: If :attr:`audbcards.Datacard.sphinx_src_dir` is not ``None`` (e.g. when used in the sphinx extension), + and the dataset contains segments, an image is stored in the file ``--segment-duration-distribution.png``, which is cached in