From 9481d2a725c0354cf8a3c7eca8b4d87076bf9c8c Mon Sep 17 00:00:00 2001 From: Stephen Rosen Date: Sat, 29 Jul 2023 22:22:09 -0500 Subject: [PATCH] Introduce remote $ref tests and fix URL join logic URL joining needs to be done between any relative ref paths and the base URI. This was not correct, as revealed by a new testcase. After fixing, this revealed that the local file loading was somewhat indirect in a way that it no longer needs to be, so local file handling in the ref resolver has been refactored as well. --- .../schema_loader/resolver.py | 26 +++---- .../acceptance/test_remote_ref_resolution.py | 76 +++++++++++++++++++ 2 files changed, 87 insertions(+), 15 deletions(-) create mode 100644 tests/acceptance/test_remote_ref_resolution.py diff --git a/src/check_jsonschema/schema_loader/resolver.py b/src/check_jsonschema/schema_loader/resolver.py index 6207b30cf..5d658eadc 100644 --- a/src/check_jsonschema/schema_loader/resolver.py +++ b/src/check_jsonschema/schema_loader/resolver.py @@ -1,6 +1,5 @@ from __future__ import annotations -import pathlib import typing as t import urllib.parse @@ -38,25 +37,22 @@ def create_retrieve_callable( parser_set: ParserSet, schema_uri: str | None ) -> t.Callable[[str], referencing.Resource[Schema]]: def get_local_file(uri: str) -> t.Any: - path = pathlib.Path(uri) - if not path.is_absolute(): - if schema_uri is None: - raise referencing.exceptions.Unretrievable( - f"Cannot retrieve schema reference data for '{uri}' from " - "local filesystem. " - "The path appears relative, but there is no known local base path." - ) - schema_path = filename2path(schema_uri) - path = schema_path.parent / path + path = filename2path(uri) return parser_set.parse_file(path, "json") def retrieve_reference(uri: str) -> referencing.Resource[Schema]: scheme = urllib.parse.urlsplit(uri).scheme - if scheme in ("http", "https"): - data = requests.get(uri, stream=True) - parsed_object = parser_set.parse_data_with_path(data.raw, uri, "json") + if scheme == "" and schema_uri is not None: + full_uri = urllib.parse.urljoin(schema_uri, uri) else: - parsed_object = get_local_file(uri) + full_uri = uri + + full_uri_scheme = urllib.parse.urlsplit(full_uri).scheme + if full_uri_scheme in ("http", "https"): + data = requests.get(full_uri, stream=True) + parsed_object = parser_set.parse_data_with_path(data.raw, full_uri, "json") + else: + parsed_object = get_local_file(full_uri) return referencing.Resource.from_contents( parsed_object, default_specification=DRAFT202012 diff --git a/tests/acceptance/test_remote_ref_resolution.py b/tests/acceptance/test_remote_ref_resolution.py new file mode 100644 index 000000000..626b93bf9 --- /dev/null +++ b/tests/acceptance/test_remote_ref_resolution.py @@ -0,0 +1,76 @@ +import json + +import pytest +import responses + +from check_jsonschema import cachedownloader + +CASES = { + "case1": { + "main_schema": { + "$schema": "http://json-schema.org/draft-07/schema", + "properties": { + "title": {"$ref": "./title_schema.json"}, + }, + "additionalProperties": False, + }, + "other_schemas": {"title_schema": {"type": "string"}}, + "passing_document": {"title": "doc one"}, + "failing_document": {"title": 2}, + }, + "case2": { + "main_schema": { + "$schema": "http://json-schema.org/draft-07/schema", + "type": "object", + "required": ["test"], + "properties": {"test": {"$ref": "./values.json#/$defs/test"}}, + }, + "other_schemas": { + "values": { + "$schema": "http://json-schema.org/draft-07/schema", + "$defs": {"test": {"type": "string"}}, + } + }, + "passing_document": {"test": "some data"}, + "failing_document": {"test": {"foo": "bar"}}, + }, +} + + +@pytest.mark.parametrize("check_passes", (True, False)) +@pytest.mark.parametrize("casename", ("case1", "case2")) +def test_remote_ref_resolution_simple_case( + run_line, check_passes, casename, tmp_path, monkeypatch +): + def _fake_compute_default_cache_dir(self): + return str(tmp_path) + + monkeypatch.setattr( + cachedownloader.CacheDownloader, + "_compute_default_cache_dir", + _fake_compute_default_cache_dir, + ) + + main_schema_loc = "https://example.com/main.json" + responses.add("GET", main_schema_loc, json=CASES[casename]["main_schema"]) + for name, subschema in CASES[casename]["other_schemas"].items(): + other_schema_loc = f"https://example.com/{name}.json" + responses.add("GET", other_schema_loc, json=subschema) + + instance_path = tmp_path / "instance.json" + instance_path.write_text( + json.dumps( + CASES[casename]["passing_document"] + if check_passes + else CASES[casename]["failing_document"] + ) + ) + + result = run_line( + ["check-jsonschema", "--schemafile", main_schema_loc, str(instance_path)] + ) + output = f"\nstdout:\n{result.stdout}\n\nstderr:\n{result.stderr}" + if check_passes: + assert result.exit_code == 0, output + else: + assert result.exit_code == 1, output