Skip to content

Commit

Permalink
[BUG] Fixes for scikit-learn v1.4 (#91)
Browse files Browse the repository at this point in the history
* Bugfixes for scikit-learn 1.4

* Fixing pycodestyle error

* Updating GitHub action tests

* Skipping LogReg test for older sklearn versions

* Adding Python 3.12 classifier tag

* Tidying DecisionTreeClassifier fix
  • Loading branch information
naoise-h authored Jan 23, 2024
1 parent 0e6cea9 commit 9dde5b9
Show file tree
Hide file tree
Showing 7 changed files with 32 additions and 26 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/code.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: '3.10'
python-version: '3.11'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/general.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ['3.8', '3.9', '3.10', '3.11']
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']

steps:
- uses: actions/checkout@v3
Expand Down
27 changes: 9 additions & 18 deletions .github/workflows/libraries.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,44 +19,35 @@ jobs:

matrix:
include:
- library: numpy
version: 1.21.6
python-version: '3.10'
- library: numpy
version: 1.22.4
python-version: '3.10'
- library: numpy
version: 1.23.5
python-version: '3.10'
- library: numpy
version: 1.24.4
python-version: '3.11'
- library: numpy
version: 1.25.2
python-version: '3.11'

- library: scikit-learn
version: 0.24.2
python-version: 3.9
- library: scikit-learn
version: 1.0.2
python-version: '3.10'
- library: scikit-learn
version: 1.1.3
python-version: '3.10'
- library: scikit-learn
version: 1.2.2
python-version: '3.10'
- library: scikit-learn
version: 1.3.2
python-version: '3.11'

- library: scipy
version: 1.7.3
python-version: '3.10'
- library: scipy
version: 1.8.1
python-version: '3.10'
- library: scipy
version: 1.9.3
python-version: '3.11'
- library: scipy
version: 1.10.1
python-version: '3.11'
- library: scipy
version: 1.11.4
python-version: '3.11'

- library: crlibm
python-version: '3.10'
Expand Down
10 changes: 9 additions & 1 deletion diffprivlib/models/forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ class DecisionTreeClassifier(skDecisionTreeClassifier, DiffprivlibMixin):
skDecisionTreeClassifier, "max_depth", "random_state")

def __init__(self, max_depth=5, *, epsilon=1, bounds=None, classes=None, random_state=None, accountant=None,
**unused_args):
criterion=None, **unused_args):
# Todo: Remove when scikit-learn v1.0 is a min requirement
try:
super().__init__( # pylint: disable=unexpected-keyword-arg
Expand Down Expand Up @@ -379,6 +379,9 @@ def __init__(self, max_depth=5, *, epsilon=1, bounds=None, classes=None, random_
self.classes = classes
self.accountant = BudgetAccountant.load_default(accountant)

if criterion is not None:
unused_args['criterion'] = criterion

self._warn_unused_args(unused_args)

def fit(self, X, y, sample_weight=None, check_input=True):
Expand Down Expand Up @@ -448,6 +451,11 @@ def fit(self, X, y, sample_weight=None, check_input=True):

return self

def _fit(self, X, y, sample_weight=None, check_input=True, missing_values_in_feature_mask=None):
self.fit(X, y, sample_weight=sample_weight, check_input=check_input)

return self

@property
def n_features_(self):
return self.n_features_in_
Expand Down
10 changes: 7 additions & 3 deletions diffprivlib/models/logistic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ def _logistic_regression_path(X, y, epsilon, data_norm, pos_class=None, Cs=10, f
X = check_array(X, accept_sparse='csr', dtype=np.float64, accept_large_sparse=True)
y = check_array(y, ensure_2d=False, dtype=None)
check_consistent_length(X, y)
_, n_features = X.shape
n_samples, n_features = X.shape

classes = np.unique(y)

Expand Down Expand Up @@ -400,17 +400,21 @@ def _logistic_regression_path(X, y, epsilon, data_norm, pos_class=None, Cs=10, f

if SKL_LOSS_MODULE:
func = LinearModelLoss(base_loss=HalfBinomialLoss(), fit_intercept=fit_intercept).loss_gradient
sw_sum = n_samples
else:
func = _logistic_loss_and_grad
sw_sum = 1

coefs = []
n_iter = np.zeros(len(Cs), dtype=np.int32)
for i, C in enumerate(Cs):
vector_mech = Vector(epsilon=epsilon, dimension=n_features + int(fit_intercept), alpha=1. / C,
l2_reg_strength = 1.0 / (C * sw_sum)
vector_mech = Vector(epsilon=epsilon, dimension=n_features + int(fit_intercept), alpha=l2_reg_strength,
function_sensitivity=0.25, data_sensitivity=data_norm, random_state=random_state)
noisy_logistic_loss = vector_mech.randomise(func)

args = (X, target, sample_weight, 1. / C) if SKL_LOSS_MODULE else (X, target, 1. / C, sample_weight)
args = (X, target, sample_weight, l2_reg_strength) if SKL_LOSS_MODULE else (X, target, l2_reg_strength,
sample_weight)

iprint = [-1, 50, 1, 100, 101][np.searchsorted(np.array([0, 1, 2, 3]), verbose)]
output_vec, _, info = optimize.fmin_l_bfgs_b(noisy_logistic_loss, output_vec, fprime=None,
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ def get_version(file_path):
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
'Programming Language :: Python :: 3.12',
'Topic :: Software Development :: Libraries',
'Topic :: Software Development :: Libraries :: Python Modules',
'Topic :: Scientific/Engineering',
Expand Down
6 changes: 4 additions & 2 deletions tests/models/test_LogisticRegression.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import numpy as np
from unittest import TestCase
from unittest import TestCase, skipIf

from diffprivlib.models.logistic_regression import LogisticRegression
from diffprivlib.utils import PrivacyLeakWarning, DiffprivlibCompatibilityWarning, BudgetError
from sklearn import __version__ as sklearn_version


class TestLogisticRegression(TestCase):
Expand Down Expand Up @@ -151,6 +152,7 @@ def test_different_results(self):

self.assertTrue(np.any(predict1 != predict2) or np.any(predict1 != predict3))

@skipIf(sklearn_version < "1.4", "The penalty was scaled incorrectly in previous versions (Scikit-Learn GH 26721)")
def test_same_results(self):
from sklearn import datasets
from sklearn.model_selection import train_test_split
Expand Down Expand Up @@ -180,7 +182,7 @@ def test_simple(self):
X -= 3.0
X /= 2.5

clf = LogisticRegression(epsilon=2, data_norm=1.0, random_state=0)
clf = LogisticRegression(epsilon=2, data_norm=1.0, random_state=1)
clf.fit(X, y)

self.assertIsNotNone(clf)
Expand Down

0 comments on commit 9dde5b9

Please sign in to comment.