diff --git a/.github/workflows/develop-test.yml b/.github/workflows/develop-test.yml index 762113d1..a9575e54 100644 --- a/.github/workflows/develop-test.yml +++ b/.github/workflows/develop-test.yml @@ -42,6 +42,7 @@ jobs: python tests/batch.py python tests/predict.py python tests/precompute/fast_posterior_mean.py + python tests/scale_opt.py - name: Optimize Tests if: matrix.test-group == 'optimize' run: python tests/optimize.py diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 33ee47e1..da323051 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -40,6 +40,7 @@ all_tests: - salloc -N1 -ppvis -A muygps --mpibind=on python tests/neighbors.py - salloc -N1 -ppvis -A muygps --mpibind=on python tests/kernels.py - salloc -N1 -ppvis -A muygps --mpibind=on python tests/gp.py + - salloc -N1 -ppvis -A muygps --mpibind=on python tests/scale_opt.py - salloc -N1 -ppvis -A muygps --mpibind=on python tests/optimize.py - salloc -N1 -ppvis -A muygps --mpibind=on python tests/predict.py - salloc -N1 -ppvis -A muygps --mpibind=on python tests/multivariate.py @@ -52,6 +53,7 @@ all_tests: - echo "performing MPI tests" - salloc -N1 --tasks-per-node=36 -ppvis -A muygps --mpibind=on python tests/kernels.py - salloc -N1 --tasks-per-node=36 -ppvis -A muygps --mpibind=on python tests/gp.py + - salloc -N1 --tasks-per-node=36 -ppvis -A muygps --mpibind=on python tests/scale_opt.py - salloc -N1 --tasks-per-node=36 -ppvis -A muygps --mpibind=on python tests/optimize.py - salloc -N1 --tasks-per-node=36 -ppvis -A muygps --mpibind=on python tests/predict.py - salloc -N1 --tasks-per-node=36 -ppvis -A muygps --mpibind=on python tests/multivariate.py diff --git a/MuyGPyS/_src/optimize/scale/__init__.py b/MuyGPyS/_src/optimize/scale/__init__.py index c55f7f0e..b438e0bb 100644 --- a/MuyGPyS/_src/optimize/scale/__init__.py +++ b/MuyGPyS/_src/optimize/scale/__init__.py @@ -5,7 +5,11 @@ from MuyGPyS._src.util import _collect_implementation -[_analytic_scale_optim] = _collect_implementation( +[ + _analytic_scale_optim, + _analytic_scale_optim_unnormalized, +] = _collect_implementation( "MuyGPyS._src.optimize.scale", "_analytic_scale_optim", + "_analytic_scale_optim_unnormalized", ) diff --git a/MuyGPyS/gp/hyperparameter/__init__.py b/MuyGPyS/gp/hyperparameter/__init__.py index 4897d66c..aed66b78 100644 --- a/MuyGPyS/gp/hyperparameter/__init__.py +++ b/MuyGPyS/gp/hyperparameter/__init__.py @@ -5,4 +5,4 @@ from .scalar import Parameter, Parameter as ScalarParam from .tensor import TensorParam -from .scale import AnalyticScale, FixedScale, ScaleFn +from .scale import AnalyticScale, DownSampleScale, FixedScale, ScaleFn diff --git a/MuyGPyS/gp/hyperparameter/scale.py b/MuyGPyS/gp/hyperparameter/scale.py index fbc339cd..93f389a3 100644 --- a/MuyGPyS/gp/hyperparameter/scale.py +++ b/MuyGPyS/gp/hyperparameter/scale.py @@ -10,8 +10,12 @@ from typing import Callable, Tuple, Type import MuyGPyS._src.math as mm +import MuyGPyS._src.math.numpy as np from MuyGPyS._src.util import _fullname -from MuyGPyS._src.optimize.scale import _analytic_scale_optim +from MuyGPyS._src.optimize.scale import ( + _analytic_scale_optim, + _analytic_scale_optim_unnormalized, +) class ScaleFn: @@ -40,7 +44,9 @@ def __init__( _backend_outer: Callable = mm.outer, **kwargs, ): - self.val = _backend_ones(response_count) + self.val = _backend_ones( + self._check_positive_integer(response_count, "response") + ) self._trained = False self._backend_ndarray = _backend_ndarray @@ -48,6 +54,13 @@ def __init__( self._backend_farray = _backend_farray self._backend_outer = _backend_outer + def _check_positive_integer(self, count, name) -> int: + if not isinstance(count, int) or count < 0: + raise ValueError( + f"{name} count must be a positive integer, not {count}" + ) + return count + def __str__(self, **kwargs): return f"{type(self).__name__}({self.val})" @@ -169,6 +182,10 @@ class AnalyticScale(ScaleFn): The integer number of response dimensions. """ + def __init__(self, _backend_fn: Callable = _analytic_scale_optim, **kwargs): + super().__init__(**kwargs) + self._fn = _backend_fn + def get_opt_fn(self, muygps) -> Callable: """ Get a function to optimize the value of the :math:`\\sigma^2` scale @@ -203,6 +220,82 @@ def get_opt_fn(self, muygps) -> Callable: """ def analytic_scale_opt_fn(K, nn_targets, *args, **kwargs): - return _analytic_scale_optim(muygps.noise.perturb(K), nn_targets) + return self._fn(muygps.noise.perturb(K), nn_targets) return analytic_scale_opt_fn + + +class DownSampleScale(ScaleFn): + """ + An optimizable :math:`\\sigma^2` covariance scale parameter. + + Identical to :class:`~MuyGPyS.gp.scale.FixedScale`, save that its + `get_opt_fn` method performs an analytic optimization. + + Args: + response_count: + The integer number of response dimensions. + down_count: + The integer number of neighbors to sample, without replacement. + Must be less than `nn_count`. + iteration_count: + The number of iterations to + """ + + def __init__( + self, + down_count: int = 10, + iteration_count: int = 10, + _backend_fn: Callable = _analytic_scale_optim_unnormalized, + **kwargs, + ): + super().__init__(**kwargs) + self._down_count = self._check_positive_integer( + down_count, "down sample" + ) + self._iteration_count = self._check_positive_integer( + iteration_count, "down sample iteration" + ) + self._fn = _backend_fn + + def get_opt_fn(self, muygps) -> Callable: + """ + Args: + muygps: + The model to used to create and perturb the kernel. + + Returns: + A function with signature + `(K, nn_targets, *args, **kwargs) -> mm.ndarray` that perturbs the + `(batch_count, nn_count, nn_count)` tensor `K` with `muygps`'s noise + model before solving it against the + `(batch_count, nn_count, response_count)` tensor `nn_targets`. + """ + + def downsample_analytic_scale_opt_fn(K, nn_targets, *args, **kwargs): + batch_count, nn_count, _ = K.shape + if nn_count <= self._down_count: + raise ValueError( + f"bad attempt to downsample {self._down_count} elements " + f"from a set of only {nn_count} options" + ) + pK = muygps.noise.perturb(K) + scales = [] + for _ in range(self._iteration_count): + sampled_indices = np.random.choice( + np.arange(nn_count), + size=self._down_count, + replace=False, + ) + sampled_indices.sort() + + pK_down = pK[:, sampled_indices, :] + pK_down = pK_down[:, :, sampled_indices] + nn_targets_down = nn_targets[:, sampled_indices, :] + scales.append(self._fn(pK_down, nn_targets_down)) + + return mm.atleast_1d(np.median(scales, axis=0)) / ( + self._down_count * batch_count + ) + + return downsample_analytic_scale_opt_fn diff --git a/tests/backend/jax_correctness.py b/tests/backend/jax_correctness.py index c060dc48..3abc2077 100644 --- a/tests/backend/jax_correctness.py +++ b/tests/backend/jax_correctness.py @@ -165,6 +165,7 @@ def _make_muygps_rbf_n(cls): cls.noise, _backend_fn=homoscedastic_perturb_n ), scale=AnalyticScale( + _backend_fn=analytic_scale_optim_n, _backend_ones=np.ones, _backend_ndarray=np.ndarray, _backend_ftype=np.ftype, @@ -191,6 +192,7 @@ def _make_muygps_n( ), noise=noise, scale=AnalyticScale( + _backend_fn=analytic_scale_optim_n, _backend_ones=np.ones, _backend_ndarray=np.ndarray, _backend_ftype=np.ftype, @@ -262,6 +264,7 @@ def _make_muygps_rbf_j(cls): cls.noise, _backend_fn=homoscedastic_perturb_j ), scale=AnalyticScale( + _backend_fn=analytic_scale_optim_j, _backend_ones=jnp.ones, _backend_ndarray=jnp.ndarray, _backend_ftype=jnp.ftype, @@ -288,6 +291,7 @@ def _make_muygps_j( ), noise=noise, scale=AnalyticScale( + _backend_fn=analytic_scale_optim_j, _backend_ones=jnp.ones, _backend_ndarray=jnp.ndarray, _backend_ftype=jnp.ftype, @@ -850,10 +854,10 @@ def test_diagonal_variance_heteroscedastic(self): def test_scale_optim(self): self.assertTrue( allclose_inv( - analytic_scale_optim_n( + self.muygps_gen_n.scale.get_opt_fn(self.muygps_gen_n)( self.homoscedastic_K_n, self.batch_nn_targets_n ), - analytic_scale_optim_j( + self.muygps_gen_j.scale.get_opt_fn(self.muygps_gen_j)( self.homoscedastic_K_j, self.batch_nn_targets_j ), ) @@ -862,12 +866,12 @@ def test_scale_optim(self): def test_scale_optim_heteroscedastic(self): self.assertTrue( allclose_inv( - analytic_scale_optim_n( - self.heteroscedastic_K_n, self.batch_nn_targets_n - ), - analytic_scale_optim_j( - self.heteroscedastic_K_j, self.batch_nn_targets_j - ), + self.muygps_heteroscedastic_n.scale.get_opt_fn( + self.muygps_heteroscedastic_n + )(self.heteroscedastic_K_n, self.batch_nn_targets_n), + self.muygps_heteroscedastic_j.scale.get_opt_fn( + self.muygps_heteroscedastic_j + )(self.heteroscedastic_K_j, self.batch_nn_targets_j), ) ) diff --git a/tests/backend/torch_correctness.py b/tests/backend/torch_correctness.py index e54d412d..adc0d0e5 100644 --- a/tests/backend/torch_correctness.py +++ b/tests/backend/torch_correctness.py @@ -142,6 +142,7 @@ def _make_muygps_rbf_n(cls): cls.noise, _backend_fn=homoscedastic_perturb_n ), scale=AnalyticScale( + _backend_fn=analytic_scale_optim_n, _backend_ones=np.ones, _backend_ndarray=np.ndarray, _backend_ftype=np.ftype, @@ -166,6 +167,7 @@ def _make_muygps_n(cls, smoothness, noise, deformation): ), noise=noise, scale=AnalyticScale( + _backend_fn=analytic_scale_optim_n, _backend_ones=np.ones, _backend_ndarray=np.ndarray, _backend_ftype=np.ftype, @@ -233,6 +235,7 @@ def _make_muygps_rbf_t(cls): cls.noise, _backend_fn=homoscedastic_perturb_t ), scale=AnalyticScale( + _backend_fn=analytic_scale_optim_t, _backend_ones=torch.ones, _backend_ndarray=torch.ndarray, _backend_ftype=torch.ftype, @@ -257,6 +260,7 @@ def _make_muygps_t(cls, smoothness, noise, deformation): ), noise=noise, scale=AnalyticScale( + _backend_fn=analytic_scale_optim_t, _backend_ones=torch.ones, _backend_ndarray=torch.ndarray, _backend_ftype=torch.ftype, @@ -649,12 +653,12 @@ def test_heteroscedastic_noise(self): def test_posterior_mean(self): self.assertTrue( _allclose( - muygps_posterior_mean_n( + self.muygps_05_n.posterior_mean( self.homoscedastic_K_n, self.Kcross_n, self.batch_nn_targets_n, ), - muygps_posterior_mean_t( + self.muygps_05_t.posterior_mean( self.homoscedastic_K_t, self.Kcross_t, self.batch_nn_targets_t, @@ -665,12 +669,12 @@ def test_posterior_mean(self): def test_posterior_mean_heteroscedastic(self): self.assertTrue( _allclose( - muygps_posterior_mean_n( + self.muygps_heteroscedastic_n.posterior_mean( self.heteroscedastic_K_n, self.Kcross_n, self.batch_nn_targets_n, ), - muygps_posterior_mean_t( + self.muygps_heteroscedastic_t.posterior_mean( self.heteroscedastic_K_t, self.Kcross_t, self.batch_nn_targets_t, @@ -681,10 +685,10 @@ def test_posterior_mean_heteroscedastic(self): def test_diagonal_variance(self): self.assertTrue( np.allclose( - muygps_diagonal_variance_n( + self.muygps_05_n.posterior_variance( self.homoscedastic_K_n, self.Kcross_n ), - muygps_diagonal_variance_t( + self.muygps_05_t.posterior_variance( self.homoscedastic_K_t, self.Kcross_t ), ) @@ -693,10 +697,10 @@ def test_diagonal_variance(self): def test_diagonal_variance_heteroscedastic(self): self.assertTrue( np.allclose( - muygps_diagonal_variance_n( + self.muygps_heteroscedastic_n.posterior_variance( self.heteroscedastic_K_n, self.Kcross_n ), - muygps_diagonal_variance_t( + self.muygps_heteroscedastic_t.posterior_variance( self.heteroscedastic_K_t, self.Kcross_t ), ) @@ -705,10 +709,10 @@ def test_diagonal_variance_heteroscedastic(self): def test_scale_optim(self): self.assertTrue( np.allclose( - analytic_scale_optim_n( + self.muygps_rbf_n.scale.get_opt_fn(self.muygps_rbf_n)( self.homoscedastic_K_n, self.batch_nn_targets_n ), - analytic_scale_optim_t( + self.muygps_rbf_t.scale.get_opt_fn(self.muygps_rbf_t)( self.homoscedastic_K_t, self.batch_nn_targets_t ), ) @@ -717,12 +721,12 @@ def test_scale_optim(self): def test_scale_optim_heteroscedastic(self): self.assertTrue( np.allclose( - analytic_scale_optim_n( - self.heteroscedastic_K_n, self.batch_nn_targets_n - ), - analytic_scale_optim_t( - self.heteroscedastic_K_t, self.batch_nn_targets_t - ), + self.muygps_heteroscedastic_n.scale.get_opt_fn( + self.muygps_heteroscedastic_n + )(self.heteroscedastic_K_n, self.batch_nn_targets_n), + self.muygps_heteroscedastic_t.scale.get_opt_fn( + self.muygps_heteroscedastic_t + )(self.heteroscedastic_K_t, self.batch_nn_targets_t), ) ) diff --git a/tests/optimize.py b/tests/optimize.py index 1acdd10b..e6d24051 100644 --- a/tests/optimize.py +++ b/tests/optimize.py @@ -7,17 +7,13 @@ from absl.testing import parameterized import MuyGPyS._src.math as mm -from MuyGPyS._src.gp.tensors import _pairwise_differences from MuyGPyS import config -from MuyGPyS._src.mpi_utils import _consistent_chunk_tensor -from MuyGPyS._test.gp import get_analytic_scale from MuyGPyS._test.optimize import BenchmarkTestCase from MuyGPyS._test.utils import ( _advanced_opt_fn_and_kwarg_options, _basic_opt_fn_and_kwarg_options, _basic_nn_kwarg_options, - _sq_rel_err, ) from MuyGPyS.gp import MuyGPS @@ -25,15 +21,8 @@ from MuyGPyS.gp.hyperparameter import AnalyticScale, FixedScale, ScalarParam from MuyGPyS.gp.kernels import Matern from MuyGPyS.gp.noise import HomoscedasticNoise -from MuyGPyS.gp.tensors import pairwise_tensor from MuyGPyS.neighbors import NN_Wrapper -from MuyGPyS.optimize.batch import sample_batch -from MuyGPyS.optimize.loss import ( - lool_fn, - mse_fn, - pseudo_huber_fn, - looph_fn, -) +from MuyGPyS.optimize.loss import lool_fn, mse_fn, pseudo_huber_fn, looph_fn if config.state.backend != "numpy": raise ValueError("optimize.py only supports the numpy backend at this time") @@ -71,82 +60,6 @@ def test_types(self): ) -class ScaleTest(BenchmarkTestCase): - @classmethod - def setUpClass(cls): - super(ScaleTest, cls).setUpClass() - - def test_scale(self): - mrse = 0.0 - pairwise_diffs = _pairwise_differences(self.train_features) - K = self.gp.kernel(pairwise_diffs) + self.gp.noise() * mm.eye( - self.feature_count - ) - for i in range(self.its): - ss = get_analytic_scale(K, self.train_features) - mrse += _sq_rel_err(self.gp.scale(), ss) - - mrse /= self.its - print(f"optimizes with mean relative squared error {mrse}") - self.assertLessEqual(mrse, self.scale_tol) - - -class ScaleOptimTest(BenchmarkTestCase): - @classmethod - def setUpClass(cls): - super(ScaleOptimTest, cls).setUpClass() - - @parameterized.parameters( - ( - (b, n, nn_kwargs) - for b in [250] - for n in [20] - for nn_kwargs in [_basic_nn_kwarg_options[0]] - ) - ) - def test_scale_optim( - self, - batch_count, - nn_count, - nn_kwargs, - ): - mrse = 0.0 - - nbrs_lookup = NN_Wrapper(self.train_features, nn_count, **nn_kwargs) - - for i in range(self.its): - muygps = MuyGPS( - kernel=Matern( - smoothness=ScalarParam(self.params["smoothness"]()), - deformation=Isotropy( - metric=l2, - length_scale=ScalarParam(self.params["length_scale"]()), - ), - ), - noise=HomoscedasticNoise(self.params["noise"]()), - scale=AnalyticScale(), - ) - _, batch_nn_indices = sample_batch( - nbrs_lookup, batch_count, self.train_count - ) - batch_pairwise_diffs = pairwise_tensor( - self.train_features, batch_nn_indices - ) - batch_nn_targets = _consistent_chunk_tensor( - self.train_responses[i, batch_nn_indices, :] - ) - - muygps = muygps.optimize_scale( - batch_pairwise_diffs, batch_nn_targets - ) - estimate = muygps.scale()[0] - - mrse += _sq_rel_err(self.gp.scale(), estimate) - mrse /= self.its - print(f"optimizes with mean relative squared error {mrse}") - self.assertLessEqual(mrse, self.scale_tol) - - class SmoothnessTest(BenchmarkTestCase): @classmethod def setUpClass(cls): diff --git a/tests/scale_opt.py b/tests/scale_opt.py new file mode 100644 index 00000000..b9ae83b6 --- /dev/null +++ b/tests/scale_opt.py @@ -0,0 +1,174 @@ +# Copyright 2021-2023 Lawrence Livermore National Security, LLC and other +# MuyGPyS Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: MIT + +from absl.testing import absltest +from absl.testing import parameterized + +import MuyGPyS._src.math as mm +from MuyGPyS._src.gp.tensors import _pairwise_differences + +from MuyGPyS import config +from MuyGPyS._src.mpi_utils import _consistent_chunk_tensor +from MuyGPyS._test.gp import get_analytic_scale +from MuyGPyS._test.optimize import BenchmarkTestCase +from MuyGPyS._test.utils import _basic_nn_kwarg_options, _sq_rel_err +from MuyGPyS.gp import MuyGPS + +from MuyGPyS.gp.deformation import Isotropy, l2 +from MuyGPyS.gp.hyperparameter import ( + AnalyticScale, + DownSampleScale, + ScalarParam, +) +from MuyGPyS.gp.kernels import Matern +from MuyGPyS.gp.noise import HomoscedasticNoise +from MuyGPyS.gp.tensors import pairwise_tensor +from MuyGPyS.neighbors import NN_Wrapper +from MuyGPyS.optimize.batch import sample_batch + +if config.state.backend != "numpy": + raise ValueError("optimize.py only supports the numpy backend at this time") + + +class ScaleTest(BenchmarkTestCase): + @classmethod + def setUpClass(cls): + super(ScaleTest, cls).setUpClass() + + def test_scale(self): + mrse = 0.0 + pairwise_diffs = _pairwise_differences(self.train_features) + K = self.gp.kernel(pairwise_diffs) + self.gp.noise() * mm.eye( + self.feature_count + ) + for i in range(self.its): + ss = get_analytic_scale(K, self.train_features) + mrse += _sq_rel_err(self.gp.scale(), ss) + + mrse /= self.its + print(f"optimizes with mean relative squared error {mrse}") + self.assertLessEqual(mrse, self.scale_tol) + + +class AnalyticOptimTest(BenchmarkTestCase): + @classmethod + def setUpClass(cls): + super(AnalyticOptimTest, cls).setUpClass() + + @parameterized.parameters( + ( + (b, n, nn_kwargs) + for b in [250] + for n in [20] + for nn_kwargs in [_basic_nn_kwarg_options[0]] + ) + ) + def test_scale_optim( + self, + batch_count, + nn_count, + nn_kwargs, + ): + mrse = 0.0 + + nbrs_lookup = NN_Wrapper(self.train_features, nn_count, **nn_kwargs) + + for i in range(self.its): + muygps = MuyGPS( + kernel=Matern( + smoothness=ScalarParam(self.params["smoothness"]()), + deformation=Isotropy( + metric=l2, + length_scale=ScalarParam(self.params["length_scale"]()), + ), + ), + noise=HomoscedasticNoise(self.params["noise"]()), + scale=AnalyticScale(), + ) + _, batch_nn_indices = sample_batch( + nbrs_lookup, batch_count, self.train_count + ) + batch_pairwise_diffs = pairwise_tensor( + self.train_features, batch_nn_indices + ) + batch_nn_targets = _consistent_chunk_tensor( + self.train_responses[i, batch_nn_indices, :] + ) + + muygps = muygps.optimize_scale( + batch_pairwise_diffs, batch_nn_targets + ) + estimate = muygps.scale()[0] + + mrse += _sq_rel_err(self.gp.scale(), estimate) + mrse /= self.its + print(f"optimizes with mean relative squared error {mrse}") + self.assertLessEqual(mrse, self.scale_tol) + + +class DownSampleOptimTest(BenchmarkTestCase): + @classmethod + def setUpClass(cls): + super(DownSampleOptimTest, cls).setUpClass() + + @parameterized.parameters( + ( + (b, n, d, i, nn_kwargs) + for b in [250] + for n in [20] + for d in [15] + for i in [10] + for nn_kwargs in [_basic_nn_kwarg_options[0]] + ) + ) + def test_scale_optim( + self, + batch_count, + nn_count, + down_count, + iteration_count, + nn_kwargs, + ): + mrse = 0.0 + + nbrs_lookup = NN_Wrapper(self.train_features, nn_count, **nn_kwargs) + + for i in range(self.its): + muygps = MuyGPS( + kernel=Matern( + smoothness=ScalarParam(self.params["smoothness"]()), + deformation=Isotropy( + metric=l2, + length_scale=ScalarParam(self.params["length_scale"]()), + ), + ), + noise=HomoscedasticNoise(self.params["noise"]()), + scale=DownSampleScale( + down_count=down_count, iteration_count=iteration_count + ), + ) + _, batch_nn_indices = sample_batch( + nbrs_lookup, batch_count, self.train_count + ) + batch_pairwise_diffs = pairwise_tensor( + self.train_features, batch_nn_indices + ) + batch_nn_targets = _consistent_chunk_tensor( + self.train_responses[i, batch_nn_indices, :] + ) + + muygps = muygps.optimize_scale( + batch_pairwise_diffs, batch_nn_targets + ) + estimate = muygps.scale()[0] + + mrse += _sq_rel_err(self.gp.scale(), estimate) + mrse /= self.its + print(f"optimizes with mean relative squared error {mrse}") + self.assertLessEqual(mrse, self.scale_tol) + + +if __name__ == "__main__": + absltest.main()