From bac38a4613a7155dd76584e402bcf547429fa03e Mon Sep 17 00:00:00 2001
From: Stephen Rosen <sirosen@globus.org>
Date: Tue, 15 Aug 2023 11:49:53 -0500
Subject: [PATCH] Introduce 'best deep match' heuristic

And also print a small hint that `--verbose` should be used to try to
see all errors.
---
 src/check_jsonschema/reporter.py | 43 ++++++++++++++++++++++++++++++++
 tests/unit/test_reporters.py     |  6 +++++
 2 files changed, 49 insertions(+)

diff --git a/src/check_jsonschema/reporter.py b/src/check_jsonschema/reporter.py
index db619176b..3303ae1db 100644
--- a/src/check_jsonschema/reporter.py
+++ b/src/check_jsonschema/reporter.py
@@ -77,12 +77,33 @@ def _show_validation_error(
         if err.context:
             best_match = jsonschema.exceptions.best_match(err.context)
             self._echo("Underlying errors caused this.", indent=2)
+            self._echo("")
             self._echo("Best Match:", indent=2)
             self._echo(self._format_validation_error_message(best_match), indent=4)
+
+            best_deep_match = find_best_deep_match(err)
+            if best_deep_match != best_match:
+                self._echo("Best Deep Match:", indent=2)
+                self._echo(
+                    self._format_validation_error_message(best_deep_match), indent=4
+                )
+
             if self.verbosity > 1:
                 self._echo("All Errors:", indent=2)
                 for e in iter_validation_error(err):
                     self._echo(self._format_validation_error_message(e), indent=4)
+            else:
+                num_other_errors = len(list(iter_validation_error(err))) - 1
+                if best_deep_match != best_match:
+                    num_other_errors -= 1
+                if num_other_errors > 0:
+                    self._echo("")
+                    self._echo(
+                        f"{click.style(str(num_other_errors), fg='yellow')} other "
+                        "errors were produced. "
+                        "Use '--verbose' to see all errors.",
+                        indent=2,
+                    )
 
     def _show_parse_error(self, filename: str, err: ParseError) -> None:
         if self.verbosity < 2:
@@ -139,10 +160,17 @@ def _dump_error_map(
                 }
                 if err.context:
                     best_match = jsonschema.exceptions.best_match(err.context)
+                    best_deep_match = find_best_deep_match(err)
                     item["best_match"] = {
                         "path": best_match.json_path,
                         "message": best_match.message,
                     }
+                    item["best_deep_match"] = {
+                        "path": best_deep_match.json_path,
+                        "message": best_deep_match.message,
+                    }
+                    num_sub_errors = len(list(iter_validation_error(err))) - 1
+                    item["num_sub_errors"] = num_sub_errors
                     if self.verbosity > 1:
                         item["sub_errors"] = [
                             {"path": suberr.json_path, "message": suberr.message}
@@ -176,3 +204,18 @@ def report_errors(self, result: CheckResult) -> None:
     "text": TextReporter,
     "json": JsonReporter,
 }
+
+
+def _deep_match_relevance(error: jsonschema.ValidationError) -> tuple[bool | int, ...]:
+    validator = error.validator
+    return (
+        validator not in ("anyOf", "oneOf"),
+        len(error.absolute_path),
+        -len(error.path),
+    )
+
+
+def find_best_deep_match(
+    errors: jsonschema.ValidationError,
+) -> jsonschema.ValidationError:
+    return max(iter_validation_error(errors), key=_deep_match_relevance)
diff --git a/tests/unit/test_reporters.py b/tests/unit/test_reporters.py
index b5a7bc62f..cdeef35af 100644
--- a/tests/unit/test_reporters.py
+++ b/tests/unit/test_reporters.py
@@ -130,6 +130,11 @@ def test_text_print_validation_error_nested(capsys, verbosity):
         assert "$.foo: {} is not of type 'string'" in captured.out
         assert "$.bar: {'baz': 'buzz'} is not of type 'string'" in captured.out
         assert "$.bar.baz: 'buzz' is not of type 'integer'" in captured.out
+    else:
+        assert (
+            "4 other errors were produced. Use '--verbose' to see all errors."
+            in captured.out
+        )
 
 
 @pytest.mark.parametrize("pretty_json", (True, False))
@@ -187,6 +192,7 @@ def test_json_format_validation_error_nested(capsys, pretty_json, verbosity):
     assert len(data["errors"]) == 1
     assert "is not valid under any of the given schemas" in data["errors"][0]["message"]
     assert data["errors"][0]["has_sub_errors"]
+    assert data["errors"][0]["num_sub_errors"] == 5
 
     # stop here unless 'verbosity>=2'
     if verbosity < 2: