clean up tutorials, geniml docs, etc

databio · Feb 20, 2024 · 65091e6 · 65091e6
1 parent 2d3be3d
commit 65091e6
Show file tree

Hide file tree

Showing 31 changed files with 1,003 additions and 868 deletions.
diff --git a/autodoc.py b/autodoc.py
@@ -0,0 +1,102 @@
+# This script will auto-generate documentation for Python code, CLI usage, and Jupyter notebooks
+# It is intended to be run as a pre-build step in a MkDocs project
+# It will read the mkdocs.yml file for configuration
+# It will use the lucidoc package to auto-generate documentation for Python code
+# It will use the subprocess package to run CLI commands and capture the output
+# It will use the nbconvert package to convert Jupyter notebooks to markdown
+
+import lucidoc
+import yaml
+import subprocess
+import glob
+import nbconvert
+import os
+from pathlib import Path
+
+import argparse
+
+parser = argparse.ArgumentParser(description="Description of your program")
+parser.add_argument(
+    "--x-usage",
+    help="Exclude usage",
+    required=False,
+    default=False,
+    action="store_true",
+)
+parser.add_argument(
+    "--x-lucidoc",
+    help="Exclude lucidoc",
+    required=False,
+    default=False,
+    action="store_true",
+)
+parser.add_argument(
+    "--x-jupyter",
+    help="Exclude jupyter",
+    required=False,
+    default=False,
+    action="store_true",
+)
+
+args = vars(parser.parse_args())
+
+print(args)
+
+# Read the mkdocs config
+with open("mkdocs.yml") as stream:
+    cfg = yaml.safe_load(stream)
+
+
+if "autodoc" not in cfg:
+    print("No autodoc configuration found in mkdocs.yml")
+    exit(1)
+else:
+    cfg = cfg["autodoc"]
+
+# Process auto-documented Python code
+if args["x_lucidoc"] is False and "lucidoc" in cfg:
+    for bundle in cfg["lucidoc"]:
+        print(f"Documenting lucidoc '{bundle['pkg']}' at {bundle['outfile']}")
+        lucidoc.run_lucidoc(parse_style="rst", **bundle)
+else:
+    print("Skipping lucidoc")
+
+
+usage_tpl = """
+\n`{cmd}`
+\n
+```console
+{usage}
+```
+"""
+
+# Process CLI usage
+if args["x_usage"] is False and "cli_usage" in cfg:
+    for item in cfg["cli_usage"]:
+        result = ""
+        with open(item["template"], "r") as file:
+            result = file.read()
+        for cmd in item["commands"]:
+            print(f"Documenting command '{cmd}' to '{item['outfile']}'")
+            usage = subprocess.check_output(cmd, shell=True).decode("utf-8")
+            content = usage_tpl.format(cmd=cmd, usage=usage)
+            result += content
+        with open(item["outfile"], "w") as file:
+            file.write(result)
+else:
+    print("Skipping usage documentation")
+
+# # Render Juptyer notebooks to markdown
+if args["x_jupyter"] is False and "jupyter" in cfg:
+    for item in cfg["jupyter"]:
+        files = glob.glob(f"docs/{item['in']}/*.ipynb")
+        for nb in files:
+            bn, _ = os.path.splitext(os.path.basename(nb))
+            out = f"docs/{item['out']}/{bn}.md"
+            print(f"Converting '{nb}' to '{out}'")
+            md_result = nbconvert.exporters.export(nbconvert.MarkdownExporter(), nb)[0]
+            Path(os.path.dirname(out)).mkdir(parents=True, exist_ok=True)
+            with open(out, "w") as stream:
+                stream.write(md_result)
+else:
+    print("Skipping jupyter notebooks")
diff --git a/docs/README.md b/docs/README.md
@@ -1,5 +1,12 @@
+---
+hide:
+  - navigation
+  - toc
+  - navigation.footer
+---
+
 # Welcome to <https://docs.bedbase.org>
 
-This site hosts developer and user documentation for components of BEDbase and related tools. Use the tab navigation above to find the project of interest.
+This site hosts developer and user documentation for components of BEDbase and related tools, notably including [geniml](geniml/README.md), our package for machine learning on genomic intervals. Use the tab navigation above to find the project of interest.
 
 You can access the main BEDbase interface at <https://bedbase.org>.
diff --git a/docs/autodoc_build/bbconf_config.md → docs/bbconf/code/config.md b/docs/autodoc_build/bbconf_config.md → docs/bbconf/code/config.md
@@ -1,4 +1,3 @@
-jupyter:True
 # Bedbase configuration file documentation
 
 In order to start working with the `BedBaseConf` object, it has to be initialized first. The constuctor requires one argument, which is a path to the configuration file (in YAML format).
@@ -18,14 +17,12 @@ Here's an example of a minimal bedbase configuration file:
 !cat ../tests/data/config_min.yaml
 ```
 
-```.output
-# min config example. Refer to bbconf/const.py for key names and default values
-
-path:
-  pipeline_output_path:  $HOME/bedbase
-  bedstat_dir: bedstat_output
-  bedbuncher_dir: bedbuncher_output
-```
+    # min config example. Refer to bbconf/const.py for key names and default values
+
+    path:
+      pipeline_output_path:  $HOME/bedbase
+      bedstat_dir: bedstat_output
+      bedbuncher_dir: bedbuncher_output
 
 ## Example config file
 
@@ -38,22 +35,20 @@ Here's an example of a complete bedbase configuration file:
 !cat ../tests/data/config.yaml
 ```
 
-```.output
-database:
-  name: pipestat-test
-  user: postgres
-  password: pipestat-password
-  host: localhost
-#  port: 5432; intentionally commented out to test the defaults setting system
-path:
-  pipeline_output_path: $BEDBASE_DATA_PATH/outputs
-  bedstat_dir: bedstat_output
-  bedbuncher_dir: bedbuncher_output
-  remote_url_base: null
-server:
-  host: 0.0.0.0
-  port: 8000
-```
+    database:
+      name: pipestat-test
+      user: postgres
+      password: pipestat-password
+      host: localhost
+    #  port: 5432; intentionally commented out to test the defaults setting system
+    path:
+      pipeline_output_path: $BEDBASE_DATA_PATH/outputs
+      bedstat_dir: bedstat_output
+      bedbuncher_dir: bedbuncher_output
+      remote_url_base: null
+    server:
+      host: 0.0.0.0
+      port: 8000
 
 ## Default values
 

diff --git a/docs/autodoc_build/bbconf_demo.md → docs/bbconf/code/demo.md b/docs/autodoc_build/bbconf_demo.md → docs/bbconf/code/demo.md
@@ -1,4 +1,3 @@
-jupyter:True
 # `BedBaseConf` object usage demonstration
 
 `bbconf` standardizes reporting of [bedstat](https://github.com/databio/bedstat) and [bedbuncher](https://github.com/databio/bedsbuncher) results. It formalizes a way for these pipelines and downstream tools communicate -- the produced results can easily and reliably become an
@@ -16,10 +15,8 @@ from bbconf import *
 bbc = BedBaseConf(config_path="../tests/data/config.yaml")
 ```
 
-```.output
-DEBU 10:09:08 | bbconf:est:266 > Configured logger 'bbconf' using logmuse v0.2.6 
+    DEBU 10:09:08 | bbconf:est:266 > Configured logger 'bbconf' using logmuse v0.2.6 
 
-```
 
 As you can see above, missing entries are populated with default values.
 
@@ -36,14 +33,12 @@ The `PipestatManager` instances for bedfiles and bedsets can be accessed via the
 print(bbc.bed)
 ```
 
-```.output
-PipestatManager (bedfiles)
-Backend: PostgreSQL
-Results schema source: /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/bbconf/schemas/bedfiles_schema.yaml
-Status schema source: /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pipestat/schemas/status_schema.yaml
-Records count: 11
+    PipestatManager (bedfiles)
+    Backend: PostgreSQL
+    Results schema source: /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/bbconf/schemas/bedfiles_schema.yaml
+    Status schema source: /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pipestat/schemas/status_schema.yaml
+    Records count: 11
 
-```
 
 ### `BedBaseConf.bedset`:
 
@@ -52,14 +47,12 @@ Records count: 11
 print(bbc.bedset)
 ```
 
-```.output
-PipestatManager (bedsets)
-Backend: PostgreSQL
-Results schema source: /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/bbconf/schemas/bedsets_schema.yaml
-Status schema source: /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pipestat/schemas/status_schema.yaml
-Records count: 3
+    PipestatManager (bedsets)
+    Backend: PostgreSQL
+    Results schema source: /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/bbconf/schemas/bedsets_schema.yaml
+    Status schema source: /Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pipestat/schemas/status_schema.yaml
+    Records count: 3
 
-```
 
 ### `BedBaseConf.config`:
 
@@ -70,23 +63,21 @@ Additionally, there's a `BedBaseConf.config` property, that can be used to retri
 print(bbc.config)
 ```
 
-```.output
-database:
-  name: pipestat-test
-  user: postgres
-  password: pipestat-password
-  host: localhost
-  port: 5432
-path:
-  pipeline_output_path: $BEDBASE_DATA_PATH/outputs
-  bedstat_dir: bedstat_output
-  bedbuncher_dir: bedbuncher_output
-  remote_url_base: null
-server:
-  host: 0.0.0.0
-  port: 8000
+    database:
+      name: pipestat-test
+      user: postgres
+      password: pipestat-password
+      host: localhost
+      port: 5432
+    path:
+      pipeline_output_path: $BEDBASE_DATA_PATH/outputs
+      bedstat_dir: bedstat_output
+      bedbuncher_dir: bedbuncher_output
+      remote_url_base: null
+    server:
+      host: 0.0.0.0
+      port: 8000
 
-```
 
 ## Running a database
 
@@ -108,11 +99,9 @@ print(bbc.bed.schema["name"])
 print(bbc.bed.schema["widths_histogram"])
 ```
 
-```.output
-{'type': 'string', 'description': 'BED file name'}
-{'type': 'image', 'description': 'Quantile-trimmed histogram of widths'}
+    {'type': 'string', 'description': 'BED file name'}
+    {'type': 'image', 'description': 'Quantile-trimmed histogram of widths'}
 
-```
 
 A result of type `image` is in fact a mapping with three required elements: `path`, `thumbnail_path` and `title`. The actual jsonschema schemas can be accessed as `result_schemas` property for both tables:
 
@@ -158,10 +147,8 @@ bbc.bed.record_count
 bbc.bed.report(record_identifier="78c0e4753d04b238fc07e4ebe5a02984", values={"name": "some_name"})
 ```
 
-```.output
-These results exist for '78c0e4753d04b238fc07e4ebe5a02984': ['name']
+    These results exist for '78c0e4753d04b238fc07e4ebe5a02984': ['name']
 
-```
 
 
 
@@ -216,11 +203,9 @@ Let's try reporting a new bedfile then:
 bbc.bed.report(record_identifier="78c1e4111d04b238fc11e4ebe5a02984", values={"name": "some_name"})
 ```
 
-```.output
-Reported records for '78c1e4111d04b238fc11e4ebe5a02984' in 'bedfiles' namespace:
- - name: some_name
+    Reported records for '78c1e4111d04b238fc11e4ebe5a02984' in 'bedfiles' namespace:
+     - name: some_name
 
-```
 
 
 
@@ -266,10 +251,8 @@ Naturally, a record can be removed:
 bbc.bed.remove(record_identifier="78c1e4111d04b238fc11e4ebe5a02984") 
 ```
 
-```.output
-Removing '78c1e4111d04b238fc11e4ebe5a02984' record
+    Removing '78c1e4111d04b238fc11e4ebe5a02984' record
 
-```
 
 
 

diff --git a/docs/bbconf/config.ipynb → docs/bbconf/notebooks/config.ipynb b/docs/bbconf/config.ipynb → docs/bbconf/notebooks/config.ipynb
diff --git a/docs/bbconf/demo.ipynb → docs/bbconf/notebooks/demo.ipynb b/docs/bbconf/demo.ipynb → docs/bbconf/notebooks/demo.ipynb
diff --git a/docs/autodoc_build/bedbase_tutorial.md → docs/bedboss/code/bedbase-tutorial.md b/docs/autodoc_build/bedbase_tutorial.md → docs/bedboss/code/bedbase-tutorial.md
diff --git a/docs/autodoc_build/bedmaker_tutorial.md → docs/bedboss/code/bedmaker-tutorial.md b/docs/autodoc_build/bedmaker_tutorial.md → docs/bedboss/code/bedmaker-tutorial.md
diff --git a/docs/autodoc_build/bedqc_tutorial.md → docs/bedboss/code/bedqc-tutorial.md b/docs/autodoc_build/bedqc_tutorial.md → docs/bedboss/code/bedqc-tutorial.md
diff --git a/docs/autodoc_build/bedstat_tutorial.md → docs/bedboss/code/bedstat-tutorial.md b/docs/autodoc_build/bedstat_tutorial.md → docs/bedboss/code/bedstat-tutorial.md
diff --git a/docs/autodoc_build/tutorial_all.md → docs/bedboss/code/tutorial-all.md b/docs/autodoc_build/tutorial_all.md → docs/bedboss/code/tutorial-all.md
diff --git a/docs_jupyter/bedbase_tutorial.ipynb → .../bedboss/notebooks/bedbase-tutorial.ipynb b/docs_jupyter/bedbase_tutorial.ipynb → .../bedboss/notebooks/bedbase-tutorial.ipynb
diff --git a/docs_jupyter/bedmaker_tutorial.ipynb → ...bedboss/notebooks/bedmaker-tutorial.ipynb b/docs_jupyter/bedmaker_tutorial.ipynb → ...bedboss/notebooks/bedmaker-tutorial.ipynb
diff --git a/docs_jupyter/bedqc_tutorial.ipynb → docs/bedboss/notebooks/bedqc-tutorial.ipynb b/docs_jupyter/bedqc_tutorial.ipynb → docs/bedboss/notebooks/bedqc-tutorial.ipynb
diff --git a/docs_jupyter/bedstat_tutorial.ipynb → .../bedboss/notebooks/bedstat-tutorial.ipynb b/docs_jupyter/bedstat_tutorial.ipynb → .../bedboss/notebooks/bedstat-tutorial.ipynb
diff --git a/docs_jupyter/tutorial_all.ipynb → docs/bedboss/notebooks/tutorial-all.ipynb b/docs_jupyter/tutorial_all.ipynb → docs/bedboss/notebooks/tutorial-all.ipynb
diff --git a/docs/bedhost/README.md b/docs/bedhost/README.md
@@ -31,10 +31,10 @@ Object IDs take the form `<record_type>.<record_identifier>.<result_id>`. An exa
 
 So, you can get information about this object like this:
 
-`GET` [/objects/bed.eaf9ee97241f300f1c7e76e1f945141f.bedfile](/objects/bed.eaf9ee97241f300f1c7e76e1f945141f.bedfile)
+`GET` [https://api.bedbase.org/objects/bed.eaf9ee97241f300f1c7e76e1f945141f.bedfile](https://api.bedbase.org/objects/bed.eaf9ee97241f300f1c7e76e1f945141f.bedfile)
 
 Or, you can get a URL to download the actual file with:
 
-`GET` [/objects/bed.eaf9ee97241f300f1c7e76e1f945141f.bedfile/access/http](/objects/bed.eaf9ee97241f300f1c7e76e1f945141f.bedfile/access/http)
+`GET` [https://api.bedbase.org/objects/bed.eaf9ee97241f300f1c7e76e1f945141f.bedfile/access/http](https://api.bedbase.org/objects/bed.eaf9ee97241f300f1c7e76e1f945141f.bedfile/access/http)
 
 
diff --git a/docs/bedhost/about.md b/docs/bedhost/about.md
diff --git a/docs/citations.md b/docs/citations.md
@@ -13,6 +13,10 @@ Thanks for citing us! If you use BEDbase, geniml, or their components in your re
 |---------------|-----------------|
 | `geniml` region set evaluations | Zheng et al. (2023) *bioRxiv* |
 | `region2vec` embeddings | Gharavi et al. (2021) *Bioinformatics* |
+| `bedspace` search and embeddings | Gharavi et al. (2023) *bioRxiv* |
+| `geniml hmm` module | Rymuza et al. (2023) *bioRxiv* |
+| `bedbase` database | Unpublished |
+| `scEmbed` single-cell embedding framework | LeRoy et al. (2023) *bioRxiv* |
 
 
 

diff --git a/docs/geniml/README.md b/docs/geniml/README.md
@@ -1,19 +1,24 @@
-# Genomic interval toolkit
+# <img src="img/geniml_logo_horizontal.svg" class="img-header">
+
+<p align="center">
+<a href="https://img.shields.io/pypi/v/geniml"><img src="https://img.shields.io/pypi/v/geniml"></a>
+<a href="https://github.com/databio/geniml"><img src="https://img.shields.io/badge/source-github-354a75?logo=github"></a>
+</p>
+
+
 
 ## Introduction
 
-Geniml is a python package for building machine learning models of genomic interval data (BED files). It also includes ancillary functions to support other types of analyses of genomic interval data.
+Geniml is a *genomic interval machine learning toolkit*, a Python package for building machine learning models of genomic interval data (BED files). It also includes ancillary functions to support other types of analyses of genomic interval data.
+
+As of Feburary 2024, this package and its documentation are undergoing rapid development, leading to some tutorials getting outdated. Please raise [github issues](https://github.com/databio/geniml) if you find outdated or unclear directions, so we know where to focus effort that will benefit users.
 
 ## Install
 
 ```
-pip install --user --upgrade .
+pip install --user --upgrade geniml
 ```
 
 ## Modules
 
 `geniml` is organized into modules. The next section is an overview of each module. You can also proceed to the how-to guides for recipes on how to do specfic tasks. 
-
-## Citing
-
-If you find `geniml` useful for your research, please cite us! It helps us convince others that our work is useful. You can find a [published papers describing geniml components](manuscripts.md).
diff --git a/docs/geniml/contributing.md b/docs/geniml/contributing.md
@@ -56,5 +56,5 @@ geniml.hmm.function()
 
 ### Shared code
 
-Any variables, functions, or other code that is shared across modules should be placed in the parent module, which is held in the [geniml](geniml) folder.
+Any variables, functions, or other code that is shared across modules should be placed in the parent module, which is held in the geniml folder.