From 3dd647509de8414b985c56f47356b431d5fee3f7 Mon Sep 17 00:00:00 2001 From: Jan-Hendrik Menke Date: Tue, 14 Jan 2020 15:49:05 +0100 Subject: [PATCH 1/5] automatic detection of multioutput datasets --- tpot/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpot/base.py b/tpot/base.py index 5788c90c..06701854 100644 --- a/tpot/base.py +++ b/tpot/base.py @@ -1158,7 +1158,7 @@ def _check_dataset(self, features, target, sample_weight=None): try: if target is not None: - X, y = check_X_y(features, target, accept_sparse=True, dtype=None) + X, y = check_X_y(features, target, accept_sparse=True, dtype=None, multi_output=len(target.shape) > 1 and target.shape[1] > 1) if self._imputed: return X, y else: From 81c00790c78a0568a0f1c79e3f345a85b9370e5d Mon Sep 17 00:00:00 2001 From: Jan-Hendrik Menke Date: Mon, 20 Jan 2020 12:54:01 +0100 Subject: [PATCH 2/5] bugfix in operator_utils --- tpot/operator_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpot/operator_utils.py b/tpot/operator_utils.py index dd297214..1b60aa61 100644 --- a/tpot/operator_utils.py +++ b/tpot/operator_utils.py @@ -212,7 +212,7 @@ def op_type(cls): for dkey, dval in prange.items(): dep_import_str, dep_op_str, dep_op_obj = source_decode(dkey, verbose=verbose) if dep_import_str in import_hash: - import_hash[import_str].append(dep_op_str) + import_hash[dep_import_str].append(dep_op_str) else: import_hash[dep_import_str] = [dep_op_str] dep_op_list[pname] = dep_op_str From ae72c923dfbf8cce92ca154863df2a1e254258d4 Mon Sep 17 00:00:00 2001 From: Jan-Hendrik Menke Date: Mon, 20 Jan 2020 13:16:36 +0100 Subject: [PATCH 3/5] enable automatic multioutput for default regressors/classifiers --- tpot/base.py | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/tpot/base.py b/tpot/base.py index 06701854..06f91497 100644 --- a/tpot/base.py +++ b/tpot/base.py @@ -491,8 +491,7 @@ def _setup_toolbox(self): self._toolbox.register('expr_mut', self._gen_grow_safe, min_=self._min, max_=self._max) self._toolbox.register('mutate', self._random_mutation_operator) - - def _fit_init(self): + def _fit_init(self, multi_output_target: bool = False): # initialization for fit function if not self.warm_start or not hasattr(self, '_pareto_front'): self._pop = [] @@ -501,6 +500,35 @@ def _fit_init(self): self._last_optimized_pareto_front_n_gens = 0 self._setup_config(self.config_dict) + if multi_output_target: + single_output_classifiers = [ + 'sklearn.naive_bayes.MultinomialNB', + 'sklearn.svm.LinearSVC', + 'xgboost.XGBClassifier' + ] + single_output_regressors = [ + 'sklearn.ensemble.AdaBoostRegressor', + 'sklearn.linear_model.LassoLarsCV', + 'sklearn.linear_model.ElasticNetCV', + 'sklearn.svm.LinearSVR', + 'xgboost.XGBRegressor', + 'sklearn.linear_model.SGDRegressor' + ] + for model in list(self._config_dict.keys()): + if model in single_output_classifiers: + if 'sklearn.multioutput.MultiOutputClassifier' not in self._config_dict.keys(): + self._config_dict['sklearn.multioutput.MultiOutputClassifier'] = {"estimator": {}} + self._config_dict['sklearn.multioutput.MultiOutputClassifier']['estimator'][model] = self._config_dict[model] + self._config_dict.pop(model, None) + elif model in single_output_regressors: + if 'sklearn.multioutput.MultiOutputRegressor' not in self._config_dict.keys(): + self._config_dict['sklearn.multioutput.MultiOutputRegressor'] = {"estimator": {}} + if model == 'sklearn.linear_model.ElasticNetCV': + self._config_dict['sklearn.linear_model.MultiTaskElasticNetCV'] = self._config_dict[model] + else: + self._config_dict['sklearn.multioutput.MultiOutputRegressor']['estimator'][model] = self._config_dict[model] + self._config_dict.pop(model, None) + self._setup_template(self.template) self.operators = [] @@ -622,7 +650,7 @@ def fit(self, features, target, sample_weight=None, groups=None): Returns a copy of the fitted TPOT object """ - self._fit_init() + self._fit_init(multi_output_target=len(target.shape) > 1 and target.shape[1] > 1) features, target = self._check_dataset(features, target, sample_weight) From 124ec4b386a581ddcf0b811bfb0c909d94470d54 Mon Sep 17 00:00:00 2001 From: Weixuan Fu Date: Wed, 13 May 2020 08:48:38 -0400 Subject: [PATCH 4/5] Update operator_utils.py Fix a bug in the PR --- tpot/operator_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpot/operator_utils.py b/tpot/operator_utils.py index e66290c6..0d1f93f7 100644 --- a/tpot/operator_utils.py +++ b/tpot/operator_utils.py @@ -213,7 +213,7 @@ def op_type(cls): for dkey, dval in prange.items(): dep_import_str, dep_op_str, dep_op_obj = source_decode(dkey, verbose=verbose) if dep_import_str in import_hash: - import_hash[dep_import_str].append(dep_op_str) + import_hash[import_str].append(dep_op_str) else: import_hash[dep_import_str] = [dep_op_str] dep_op_list[pname] = dep_op_str From a86fdf9858145148f7470e527d43858ff2ca7111 Mon Sep 17 00:00:00 2001 From: Weixuan Fu Date: Wed, 13 May 2020 09:02:11 -0400 Subject: [PATCH 5/5] Update operator_utils.py Sorry, my mistake! I changed it back. --- tpot/operator_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpot/operator_utils.py b/tpot/operator_utils.py index 0d1f93f7..e66290c6 100644 --- a/tpot/operator_utils.py +++ b/tpot/operator_utils.py @@ -213,7 +213,7 @@ def op_type(cls): for dkey, dval in prange.items(): dep_import_str, dep_op_str, dep_op_obj = source_decode(dkey, verbose=verbose) if dep_import_str in import_hash: - import_hash[import_str].append(dep_op_str) + import_hash[dep_import_str].append(dep_op_str) else: import_hash[dep_import_str] = [dep_op_str] dep_op_list[pname] = dep_op_str