From 9dde5b916ef70fc4854bad3ce95d2a4851c0c417 Mon Sep 17 00:00:00 2001 From: Naoise Holohan <51835109+naoise-h@users.noreply.github.com> Date: Tue, 23 Jan 2024 15:51:57 +0000 Subject: [PATCH] [BUG] Fixes for scikit-learn v1.4 (#91) * Bugfixes for scikit-learn 1.4 * Fixing pycodestyle error * Updating GitHub action tests * Skipping LogReg test for older sklearn versions * Adding Python 3.12 classifier tag * Tidying DecisionTreeClassifier fix --- .github/workflows/code.yml | 2 +- .github/workflows/general.yml | 2 +- .github/workflows/libraries.yml | 27 ++++++++--------------- diffprivlib/models/forest.py | 10 ++++++++- diffprivlib/models/logistic_regression.py | 10 ++++++--- setup.py | 1 + tests/models/test_LogisticRegression.py | 6 +++-- 7 files changed, 32 insertions(+), 26 deletions(-) diff --git a/.github/workflows/code.yml b/.github/workflows/code.yml index 0d69505..4044d1f 100644 --- a/.github/workflows/code.yml +++ b/.github/workflows/code.yml @@ -17,7 +17,7 @@ jobs: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.11' - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/general.yml b/.github/workflows/general.yml index a0eeb9c..6aa48da 100644 --- a/.github/workflows/general.yml +++ b/.github/workflows/general.yml @@ -19,7 +19,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ['3.8', '3.9', '3.10', '3.11'] + python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/libraries.yml b/.github/workflows/libraries.yml index c3f5def..81b2ecb 100644 --- a/.github/workflows/libraries.yml +++ b/.github/workflows/libraries.yml @@ -19,44 +19,35 @@ jobs: matrix: include: - - library: numpy - version: 1.21.6 - python-version: '3.10' - - library: numpy - version: 1.22.4 - python-version: '3.10' - library: numpy version: 1.23.5 python-version: '3.10' - library: numpy version: 1.24.4 python-version: '3.11' + - library: numpy + version: 1.25.2 + python-version: '3.11' - - library: scikit-learn - version: 0.24.2 - python-version: 3.9 - - library: scikit-learn - version: 1.0.2 - python-version: '3.10' - library: scikit-learn version: 1.1.3 python-version: '3.10' - library: scikit-learn version: 1.2.2 python-version: '3.10' + - library: scikit-learn + version: 1.3.2 + python-version: '3.11' - - library: scipy - version: 1.7.3 - python-version: '3.10' - - library: scipy - version: 1.8.1 - python-version: '3.10' - library: scipy version: 1.9.3 python-version: '3.11' - library: scipy version: 1.10.1 python-version: '3.11' + - library: scipy + version: 1.11.4 + python-version: '3.11' - library: crlibm python-version: '3.10' diff --git a/diffprivlib/models/forest.py b/diffprivlib/models/forest.py index 042c61c..d3e373b 100644 --- a/diffprivlib/models/forest.py +++ b/diffprivlib/models/forest.py @@ -345,7 +345,7 @@ class DecisionTreeClassifier(skDecisionTreeClassifier, DiffprivlibMixin): skDecisionTreeClassifier, "max_depth", "random_state") def __init__(self, max_depth=5, *, epsilon=1, bounds=None, classes=None, random_state=None, accountant=None, - **unused_args): + criterion=None, **unused_args): # Todo: Remove when scikit-learn v1.0 is a min requirement try: super().__init__( # pylint: disable=unexpected-keyword-arg @@ -379,6 +379,9 @@ def __init__(self, max_depth=5, *, epsilon=1, bounds=None, classes=None, random_ self.classes = classes self.accountant = BudgetAccountant.load_default(accountant) + if criterion is not None: + unused_args['criterion'] = criterion + self._warn_unused_args(unused_args) def fit(self, X, y, sample_weight=None, check_input=True): @@ -448,6 +451,11 @@ def fit(self, X, y, sample_weight=None, check_input=True): return self + def _fit(self, X, y, sample_weight=None, check_input=True, missing_values_in_feature_mask=None): + self.fit(X, y, sample_weight=sample_weight, check_input=check_input) + + return self + @property def n_features_(self): return self.n_features_in_ diff --git a/diffprivlib/models/logistic_regression.py b/diffprivlib/models/logistic_regression.py index 112ea9f..097cde4 100644 --- a/diffprivlib/models/logistic_regression.py +++ b/diffprivlib/models/logistic_regression.py @@ -371,7 +371,7 @@ def _logistic_regression_path(X, y, epsilon, data_norm, pos_class=None, Cs=10, f X = check_array(X, accept_sparse='csr', dtype=np.float64, accept_large_sparse=True) y = check_array(y, ensure_2d=False, dtype=None) check_consistent_length(X, y) - _, n_features = X.shape + n_samples, n_features = X.shape classes = np.unique(y) @@ -400,17 +400,21 @@ def _logistic_regression_path(X, y, epsilon, data_norm, pos_class=None, Cs=10, f if SKL_LOSS_MODULE: func = LinearModelLoss(base_loss=HalfBinomialLoss(), fit_intercept=fit_intercept).loss_gradient + sw_sum = n_samples else: func = _logistic_loss_and_grad + sw_sum = 1 coefs = [] n_iter = np.zeros(len(Cs), dtype=np.int32) for i, C in enumerate(Cs): - vector_mech = Vector(epsilon=epsilon, dimension=n_features + int(fit_intercept), alpha=1. / C, + l2_reg_strength = 1.0 / (C * sw_sum) + vector_mech = Vector(epsilon=epsilon, dimension=n_features + int(fit_intercept), alpha=l2_reg_strength, function_sensitivity=0.25, data_sensitivity=data_norm, random_state=random_state) noisy_logistic_loss = vector_mech.randomise(func) - args = (X, target, sample_weight, 1. / C) if SKL_LOSS_MODULE else (X, target, 1. / C, sample_weight) + args = (X, target, sample_weight, l2_reg_strength) if SKL_LOSS_MODULE else (X, target, l2_reg_strength, + sample_weight) iprint = [-1, 50, 1, 100, 101][np.searchsorted(np.array([0, 1, 2, 3]), verbose)] output_vec, _, info = optimize.fmin_l_bfgs_b(noisy_logistic_loss, output_vec, fprime=None, diff --git a/setup.py b/setup.py index 4d9e78a..34ec167 100644 --- a/setup.py +++ b/setup.py @@ -63,6 +63,7 @@ def get_version(file_path): 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', 'Topic :: Software Development :: Libraries', 'Topic :: Software Development :: Libraries :: Python Modules', 'Topic :: Scientific/Engineering', diff --git a/tests/models/test_LogisticRegression.py b/tests/models/test_LogisticRegression.py index 06d4ba1..696bac5 100644 --- a/tests/models/test_LogisticRegression.py +++ b/tests/models/test_LogisticRegression.py @@ -1,8 +1,9 @@ import numpy as np -from unittest import TestCase +from unittest import TestCase, skipIf from diffprivlib.models.logistic_regression import LogisticRegression from diffprivlib.utils import PrivacyLeakWarning, DiffprivlibCompatibilityWarning, BudgetError +from sklearn import __version__ as sklearn_version class TestLogisticRegression(TestCase): @@ -151,6 +152,7 @@ def test_different_results(self): self.assertTrue(np.any(predict1 != predict2) or np.any(predict1 != predict3)) + @skipIf(sklearn_version < "1.4", "The penalty was scaled incorrectly in previous versions (Scikit-Learn GH 26721)") def test_same_results(self): from sklearn import datasets from sklearn.model_selection import train_test_split @@ -180,7 +182,7 @@ def test_simple(self): X -= 3.0 X /= 2.5 - clf = LogisticRegression(epsilon=2, data_norm=1.0, random_state=0) + clf = LogisticRegression(epsilon=2, data_norm=1.0, random_state=1) clf.fit(X, y) self.assertIsNotNone(clf)