Remove "dataset" return_type option from kernel transformers (#752)

* Remove "dataset" `return_type` option from kernel transformers * Drop tests ma_map_reuse * Update test_meta_kernel.py * Update test_meta_kernel.py * Update nimare/meta/kernel.py Co-authored-by: Taylor Salo <tsalo90@gmail.com> * Update nimare/meta/kernel.py Co-authored-by: Taylor Salo <tsalo90@gmail.com> * Add versionchanged to child classes Co-authored-by: Taylor Salo <tsalo90@gmail.com>
neurostuff · Jan 13, 2023 · 282f197 · 282f197
1 parent 97ab7fa
commit 282f197
Show file tree

Hide file tree

Showing 5 changed files with 51 additions and 176 deletions.
diff --git a/nimare/meta/cbma/base.py b/nimare/meta/cbma/base.py
@@ -1,7 +1,6 @@
 """CBMA methods from the ALE and MKDA families."""
 import logging
 from abc import abstractmethod
-from hashlib import md5
 
 import nibabel as nib
 import numpy as np
@@ -92,8 +91,7 @@ def _preprocess_input(self, dataset):
         ----------
         dataset : :obj:`~nimare.dataset.Dataset`
             In this method, the Dataset is used to (1) select the appropriate mask image,
-            (2) identify any pre-generated MA maps stored in its images attribute,
-            and (3) extract sample size metadata and place it into the coordinates input.
+            and (2) extract sample size metadata and place it into the coordinates input.
 
         Attributes
         ----------
@@ -111,17 +109,6 @@ def _preprocess_input(self, dataset):
 
         for name, (type_, _) in self._required_inputs.items():
             if type_ == "coordinates":
-                # Try to load existing MA maps
-                if hasattr(self, "kernel_transformer"):
-                    self.kernel_transformer._infer_names(affine=md5(mask_img.affine).hexdigest())
-                    if self.kernel_transformer.image_type in dataset.images.columns:
-                        files = dataset.get_images(
-                            ids=self.inputs_["id"],
-                            imtype=self.kernel_transformer.image_type,
-                        )
-                        if all(f is not None for f in files):
-                            self.inputs_["ma_maps"] = files
-
                 # Calculate IJK matrix indices for target mask
                 # Mask space is assumed to be the same as the Dataset's space
                 # These indices are used directly by any KernelTransformer
@@ -220,37 +207,13 @@ def _collect_ma_maps(self, coords_key="coordinates", maps_key="ma_maps"):
             Return a 4D sparse array of shape
             (n_studies, mask.shape) with MA maps.
         """
-        if maps_key in self.inputs_.keys():
-            LGR.debug(f"Loading pre-generated MA maps ({maps_key}).")
-            all_exp = []
-            all_coords = []
-            all_data = []
-            for i_exp, img in enumerate(self.inputs_[maps_key]):
-                img_data = nib.load(img).get_fdata()
-                nonzero_idx = np.where(img_data != 0)
-
-                all_exp.append(np.full(nonzero_idx[0].shape[0], i_exp))
-                all_coords.append(np.vstack(nonzero_idx))
-                all_data.append(img_data[nonzero_idx])
-
-            n_studies = len(self.inputs_[maps_key])
-            shape = img_data.shape
-            kernel_shape = (n_studies,) + shape
-
-            exp = np.hstack(all_exp)
-            coords = np.vstack((exp.flatten(), np.hstack(all_coords)))
-            data = np.hstack(all_data).flatten()
-
-            ma_maps = sparse.COO(coords, data, shape=kernel_shape)
+        LGR.debug(f"Generating MA maps from coordinates ({coords_key}).")
 
-        else:
-            LGR.debug(f"Generating MA maps from coordinates ({coords_key}).")
-
-            ma_maps = self.kernel_transformer.transform(
-                self.inputs_[coords_key],
-                masker=self.masker,
-                return_type="sparse",
-            )
+        ma_maps = self.kernel_transformer.transform(
+            self.inputs_[coords_key],
+            masker=self.masker,
+            return_type="sparse",
+        )
 
         return ma_maps
 

diff --git a/nimare/meta/kernel.py b/nimare/meta/kernel.py
@@ -7,24 +7,25 @@
 from __future__ import division
 
 import logging
-import os
-from hashlib import md5
 
 import nibabel as nib
 import numpy as np
 import pandas as pd
-import sparse
 
 from nimare.base import NiMAREBase
 from nimare.meta.utils import compute_ale_ma, compute_kda_ma, get_ale_kernel
-from nimare.utils import _add_metadata_to_dataframe, _safe_transform, mm2vox
+from nimare.utils import _add_metadata_to_dataframe, mm2vox
 
 LGR = logging.getLogger(__name__)
 
 
 class KernelTransformer(NiMAREBase):
     """Base class for modeled activation-generating methods in :mod:`~nimare.meta.kernel`.
 
+    .. versionchanged:: 0.0.13
+
+            - Remove "dataset" `return_type` option.
+
     Coordinate-based meta-analyses leverage coordinates reported in
     neuroimaging papers to simulate the thresholded statistical maps from the
     original analyses. This generally involves convolving each coordinate with
@@ -39,7 +40,7 @@ class KernelTransformer(NiMAREBase):
     """
 
     def _infer_names(self, **kwargs):
-        """Determine filename pattern and image type for files created with this transformer.
+        """Determine filename pattern and image type.
 
         The parameters used to construct the filenames come from the transformer's
         parameters (attributes saved in ``__init__()``).
@@ -53,7 +54,7 @@ def _infer_names(self, **kwargs):
         Attributes
         ----------
         filename_pattern : str
-            Filename pattern for images that will be saved by the transformer.
+            Filename pattern for images.
         image_type : str
             Name of the corresponding column in the Dataset.images DataFrame.
         """
@@ -81,9 +82,9 @@ def transform(self, dataset, masker=None, return_type="image"):
             Mask to apply to MA maps. Required if ``dataset`` is a DataFrame.
             If None (and ``dataset`` is a Dataset), the Dataset's masker attribute will be used.
             Default is None.
-        return_type : {'sparse', 'array', 'image', 'dataset'}, optional
-            Whether to return a numpy array ('array'), a list of niimgs ('image'),
-            or a Dataset with MA images saved as files ('dataset').
+        return_type : {'sparse', 'array', 'image'}, optional
+            Whether to return a sparse matrix ('sparse'), a numpy array ('array'),
+            or a list of niimgs ('image').
             Default is 'image'.
 
         Returns
@@ -97,29 +98,24 @@ def transform(self, dataset, masker=None, return_type="image"):
             contrast and V is voxel.
             If return_type is 'image', a list of modeled activation images
             (one for each of the Contrasts in the input dataset).
-            If return_type is 'dataset', a new Dataset object with modeled
-            activation images saved to files and referenced in the
-            Dataset.images attribute.
 
         Attributes
         ----------
         filename_pattern : str
-            Filename pattern for MA maps that will be saved by the transformer.
+            Filename pattern for MA maps. If :meth:`_infer_names` is executed.
         image_type : str
             Name of the corresponding column in the Dataset.images DataFrame.
+            If :meth:`_infer_names` is executed.
         """
-        if return_type not in ("sparse", "array", "image", "dataset"):
-            raise ValueError('Argument "return_type" must be "image", "array", or "dataset".')
+        if return_type not in ("sparse", "array", "image"):
+            raise ValueError('Argument "return_type" must be "image", "array", or "sparse".')
 
         if isinstance(dataset, pd.DataFrame):
             assert (
                 masker is not None
             ), "Argument 'masker' must be provided if dataset is a DataFrame."
             mask = masker.mask_img
             coordinates = dataset
-            assert (
-                return_type != "dataset"
-            ), "Input dataset must be a Dataset if return_type='dataset'."
 
             # Calculate IJK. Must assume that the masker is in same space,
             # but has different affine, from original IJK.
@@ -129,24 +125,6 @@ def transform(self, dataset, masker=None, return_type="image"):
             mask = masker.mask_img
             coordinates = dataset.coordinates.copy()
 
-            # Determine MA map filenames. Must happen after parameters are set.
-            self._infer_names(affine=md5(mask.affine).hexdigest())
-
-            # Check for existing MA maps
-            # Use coordinates to get IDs instead of Dataset.ids bc of possible
-            # mismatch between full Dataset and contrasts with coordinates.
-            if self.image_type in dataset.images.columns:
-                files = dataset.get_images(ids=coordinates["id"].unique(), imtype=self.image_type)
-                if all(f is not None for f in files):
-                    LGR.debug("Files already exist. Using them.")
-                    if return_type == "array":
-                        masked_data = _safe_transform(files, masker)
-                        return masked_data
-                    elif return_type == "image":
-                        return [nib.load(f) for f in files]
-                    elif return_type == "dataset":
-                        return dataset.copy()
-
             # Calculate IJK
             if not np.array_equal(mask.affine, dataset.masker.mask_img.affine):
                 LGR.warning("Mask affine does not match Dataset affine. Assuming same space.")
@@ -170,36 +148,22 @@ def transform(self, dataset, masker=None, return_type="image"):
                     filter_func=np.mean,
                 )
 
-        # Generate the MA maps if they weren't already available as images
         if return_type == "array":
             mask_data = mask.get_fdata().astype(bool)
         elif return_type == "image":
             dtype = type(self.value) if hasattr(self, "value") else float
             mask_data = mask.get_fdata().astype(dtype)
-        elif return_type == "dataset":
-            if dataset.basepath is None:
-                raise ValueError(
-                    "Dataset output path is not set. Set the path with Dataset.update_path()."
-                )
-            elif not os.path.isdir(dataset.basepath):
-                raise ValueError(
-                    "Output directory does not exist. Set the path to an existing folder with "
-                    "Dataset.update_path()."
-                )
-            dataset = dataset.copy()
 
+        # Generate the MA maps
         transformed_maps = self._transform(mask, coordinates)
 
         if return_type == "sparse":
             return transformed_maps[0]
 
         imgs = []
         # Loop over exp ids since sparse._coo.core.COO is not iterable
-        for i_exp, id_ in enumerate(transformed_maps[1]):
-            if isinstance(transformed_maps[0][i_exp], sparse._coo.core.COO):
-                # This step is slow, but it is here just in case user want a
-                # return_type = "array", "image", or "dataset"
-                kernel_data = transformed_maps[0][i_exp].todense()
+        for i_exp, _ in enumerate(transformed_maps[1]):
+            kernel_data = transformed_maps[0][i_exp].todense()
 
             if return_type == "array":
                 img = kernel_data[mask_data]
@@ -208,26 +172,13 @@ def transform(self, dataset, masker=None, return_type="image"):
                 kernel_data *= mask_data
                 img = nib.Nifti1Image(kernel_data, mask.affine)
                 imgs.append(img)
-            elif return_type == "dataset":
-                img = nib.Nifti1Image(kernel_data, mask.affine)
-                out_file = os.path.join(dataset.basepath, self.filename_pattern.format(id=id_))
-                img.to_filename(out_file)
-                dataset.images.loc[dataset.images["id"] == id_, self.image_type] = out_file
 
         del kernel_data, transformed_maps
 
         if return_type == "array":
             return np.vstack(imgs)
         elif return_type == "image":
             return imgs
-        elif return_type == "dataset":
-            # Replace NaNs with Nones
-            dataset.images[self.image_type] = dataset.images[self.image_type].where(
-                dataset.images[self.image_type].notnull(), None
-            )
-            # Infer relative path
-            dataset.images = dataset.images
-            return dataset
 
     def _transform(self, mask, coordinates):
         """Apply the kernel's unique transformer.
@@ -264,6 +215,10 @@ class ALEKernel(KernelTransformer):
     will be determined on a study-wise basis based on the sample sizes available in the input,
     via the method described in :footcite:t:`eickhoff2012activation`.
 
+    .. versionchanged:: 0.0.13
+
+            - Remove "dataset" `return_type` option.
+
     .. versionchanged:: 0.0.12
 
         * Remove low-memory option in favor of sparse arrays for kernel transformers.
@@ -326,6 +281,10 @@ def _transform(self, mask, coordinates):
 class KDAKernel(KernelTransformer):
     """Generate KDA modeled activation images from coordinates.
 
+    .. versionchanged:: 0.0.13
+
+            - Remove "dataset" `return_type` option.
+
     .. versionchanged:: 0.0.12
 
         * Remove low-memory option in favor of sparse arrays for kernel transformers.
@@ -363,6 +322,10 @@ def _transform(self, mask, coordinates):
 class MKDAKernel(KDAKernel):
     """Generate MKDA modeled activation images from coordinates.
 
+    .. versionchanged:: 0.0.13
+
+            - Remove "dataset" `return_type` option.
+
     .. versionchanged:: 0.0.12
 
         * Remove low-memory option in favor of sparse arrays for kernel transformers.

diff --git a/nimare/tests/test_decode_continuous.py b/nimare/tests/test_decode_continuous.py
@@ -2,6 +2,8 @@
 
 Tests for nimare.decode.continuous.gclda_decode_map are in test_annotate_gclda.
 """
+import os
+
 import pandas as pd
 import pytest
 
@@ -29,6 +31,7 @@ def test_CorrelationDistributionDecoder_smoke(testdata_laird, tmp_path_factory):
     tmpdir = tmp_path_factory.mktemp("test_CorrelationDistributionDecoder")
 
     testdata_laird = testdata_laird.copy()
+    dset = testdata_laird.copy()
     features = testdata_laird.get_labels(ids=testdata_laird.ids[0])[:5]
 
     decoder = continuous.CorrelationDistributionDecoder(features=features)
@@ -42,7 +45,16 @@ def test_CorrelationDistributionDecoder_smoke(testdata_laird, tmp_path_factory):
 
     # Then let's make some images to decode
     kern = kernel.MKDAKernel(r=10, value=1)
-    dset = kern.transform(testdata_laird, return_type="dataset")
+    kern._infer_names()  # Determine MA map filenames
+
+    imgs = kern.transform(testdata_laird, return_type="image")
+    for i_img, img in enumerate(imgs):
+        id_ = testdata_laird.ids[i_img]
+        out_file = os.path.join(testdata_laird.basepath, kern.filename_pattern.format(id=id_))
+
+        # Add file names to dset.images DataFrame
+        img.to_filename(out_file)
+        dset.images.loc[testdata_laird.images["id"] == id_, kern.image_type] = out_file
 
     # And now we have images we can use for decoding!
     decoder = continuous.CorrelationDistributionDecoder(