Add scenarios to excel example (#201)

* Replace scenarios file with simplified version. Add more detailed 'user friendly' configuration file. * Set parameter values for examples * Add scenario example * Create results folder * Added f string, does not appear to fix the problem though. Need help. * Fix line length flake8 check * Rename scenarios file used in scenario_plots notebook * Link to excel notebook as a quick start option from run.md * Replace append with concat * Fix concat syntax * Recompute notebook * retrigger checks --------- Co-authored-by: Montgomery <12001003819257@FEDIDCARD.GOV>
ncsu-landscape-dynamics · Mar 4, 2024 · d4b4c50 · d4b4c50
1 parent adfd3d8
commit d4b4c50
Show file tree

Hide file tree

Showing 10 changed files with 385 additions and 63 deletions.
diff --git a/docs/run.md b/docs/run.md
@@ -4,6 +4,10 @@ The two basic ways to run the simulation are a Python package and command line
 interface. Both interfaces take simulation parameters as a configuration file
 and several other user inputs as function arguments or command line arguments.
 
+For a quick start option, use the Jupyter notebook
+[run_from_xlsx.ipynb](run_from_xlsx.ipynb) with the provided Excel spreadsheet
+template to configure the simulation.
+
 ## Configuration
 
 The configuration file contains all parameters needed for a single simulation

diff --git a/examples/notebooks/data/scenarios.csv b/examples/notebooks/data/scenarios.csv
@@ -0,0 +1,5 @@
+name,inspection/sample_strategy,inspection/hypergeometric/detection_level
+hypergeometric 0.01,hypergeometric,0.01
+hypergeometric 0.05,hypergeometric,0.05
+hypergeometric 0.1,hypergeometric,0.1
+proportion 0.02,proportion,
diff --git a/examples/notebooks/data/scenarios_config.csv → examples/notebooks/data/scenarios_long.csv b/examples/notebooks/data/scenarios_config.csv → examples/notebooks/data/scenarios_long.csv
diff --git a/examples/notebooks/data/user_friendly_config.xlsx b/examples/notebooks/data/user_friendly_config.xlsx
diff --git a/examples/notebooks/results/user_friendly_config_results.xlsx b/examples/notebooks/results/user_friendly_config_results.xlsx
diff --git a/examples/notebooks/run_from_xlsx.ipynb b/examples/notebooks/run_from_xlsx.ipynb
diff --git a/examples/notebooks/scenario_plots.ipynb b/examples/notebooks/scenario_plots.ipynb
@@ -34,7 +34,7 @@
     "from pathlib import Path\n",
     "datadir = Path(\"data\")\n",
     "basic_config = load_configuration(datadir / \"base_config.yml\")\n",
-    "scenario_table = load_scenario_table(datadir / \"scenarios_config.csv\")"
+    "scenario_table = load_scenario_table(datadir / \"scenarios_long.csv\")"
    ]
   },
   {

diff --git a/examples/notebooks/validation_plots.ipynb b/examples/notebooks/validation_plots.ipynb
@@ -187,7 +187,7 @@
    "outputs": [],
    "source": [
     "# Combine dataframes from both tests to view contamination rates\n",
-    "contaminate_validation_df = contaminate_validation_df_1.append(contaminate_validation_df_2)\n",
+    "contaminate_validation_df = pd.concat([contaminate_validation_df_1, contaminate_validation_df_2])\n",
     "contaminate_validation_df.index = range(len(contaminate_validation_df))"
    ]
   },
@@ -220,7 +220,7 @@
     "beta_expected = pd.Series(0.007)\n",
     "beta_expected = beta_expected.repeat(repeats=len(contaminate_validation_df_2))\n",
     "\n",
-    "expected_rates = fixed_expected.append(beta_expected)\n",
+    "expected_rates = pd.concat([fixed_expected, beta_expected])\n",
     "\n",
     "expected_rates.index = range(len(expected_rates))\n",
     "simulated_rates = contaminate_validation_df[\"true_contamination_rate\"]\n",

diff --git a/popsborder/contamination.py b/popsborder/contamination.py
@@ -239,7 +239,8 @@ def choose_strata_for_clusters(num_units, cluster_width, num_clusters):
     # Make sure there are enough strata for the number of clusters needed.
     if num_strata < num_clusters:
         raise ValueError(
-            """Cannot avoid overlapping clusters. Increase contaminated_units_per_cluster
+            """Cannot avoid overlapping clusters. Increase
+            contaminated_units_per_cluster
             or decrease cluster_item_width (if using item contamination_unit)"""
         )
     # If all strata are needed, all strata are selected for clusters

diff --git a/popsborder/simulation.py b/popsborder/simulation.py
@@ -169,7 +169,7 @@ def simulation(
         missing = 100 * float(success_rates.false_negative) / (num_contaminated)
         false_neg = success_rates.false_negative
         if verbose:
-            print("Missing {missing:.0f}% of contaminated consignments.")
+            print(f"Missing {missing:.0f}% of contaminated consignments.")
     else:
         # we didn't miss anything
         missing = 0