Skip to content

Commit

Permalink
Merge pull request #11 from JuliaAI/dev
Browse files Browse the repository at this point in the history
For a 0.1.1 release
  • Loading branch information
ablaom authored Jun 4, 2024
2 parents cfd3576 + ec0eb11 commit 5d605c6
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 71 deletions.
6 changes: 3 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "FeatureSelection"
uuid = "33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6"
authors = ["Anthony D. Blaom <anthony.blaom@gmail.com>", "Samuel Okon <okonsamuel50@gmail.com"]
version = "0.1.0"
version = "0.1.1"

[deps]
MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea"
Expand All @@ -12,11 +12,11 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
Aqua = "0.8"
Distributions = "0.25"
julia = "1.6"
MLJBase = "1.1"
MLJBase = "1.4"
MLJTuning = "0.8"
MLJDecisionTreeInterface = "0.4"
MLJScikitLearnInterface = "0.6"
MLJModelInterface = "1.4"
MLJModelInterface = "1.10"
ScientificTypesBase = "3"
StableRNGs = "1"
StatisticalMeasures = "0.1"
Expand Down
14 changes: 0 additions & 14 deletions src/FeatureSelection.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,4 @@ const MMI = MLJModelInterface
include("models/featureselector.jl")
include("models/rfe.jl")

## Pkg Traits
MMI.metadata_pkg.(
(
DeterministicRecursiveFeatureElimination,
ProbabilisticRecursiveFeatureElimination,
FeatureSelector
),
package_name = "FeatureSelection",
package_uuid = "33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6",
package_url = "https://github.com/JuliaAI/FeatureSelection.jl",
is_pure_julia = true,
package_license = "MIT"
)

end # module
15 changes: 13 additions & 2 deletions src/models/featureselector.jl
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,20 @@ MMI.metadata_model(
FeatureSelector,
input_scitype = Table,
output_scitype = Table,
load_path = "FeatureSelction.FeatureSelector"
load_path = "FeatureSelection.FeatureSelector"
)

## Pkg Traits
MMI.metadata_pkg(
FeatureSelector,
package_name = "FeatureSelection",
package_uuid = "33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6",
package_url = "https://github.com/JuliaAI/FeatureSelection.jl",
is_pure_julia = true,
package_license = "MIT"
)

## Docstring
"""
$(MMI.doc_header(FeatureSelector))
Expand Down Expand Up @@ -164,4 +175,4 @@ julia> transform(fit!(machine(selector, X)), X)
```
"""
FeatureSelector
FeatureSelector
108 changes: 60 additions & 48 deletions src/models/rfe.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
function warn_double_spec(arg, model)
return "Using `model=$arg`. Ignoring keyword specification `model=$model`. "
end

const ERR_SPECIFY_MODEL = ArgumentError(
"You need to specify model as positional argument or specify `model=...`."
)
Expand Down Expand Up @@ -36,66 +36,67 @@ for (ModelType, ModelSuperType) in MODELTYPE_GIVEN_SUPERTYPES
eval(ex)
end

eval(:(const RFE{M} = Union{$((Expr(:curly, modeltype, :M) for modeltype in MODEL_TYPES)...)}))
eval(:(const RFE{M} =
Union{$((Expr(:curly, modeltype, :M) for modeltype in MODEL_TYPES)...)}))

# Common keyword constructor for both model types
"""
RecursiveFeatureElimination(model, n_features, step)
This model implements a recursive feature elimination algorithm for feature selection.
It recursively removes features, training a base model on the remaining features and
This model implements a recursive feature elimination algorithm for feature selection.
It recursively removes features, training a base model on the remaining features and
evaluating their importance until the desired number of features is selected.
Construct an instance with default hyper-parameters using the syntax
`model = RecursiveFeatureElimination(model=...)`. Provide keyword arguments to override
hyper-parameter defaults.
Construct an instance with default hyper-parameters using the syntax
`rfe_model = RecursiveFeatureElimination(model=...)`. Provide keyword arguments to override
hyper-parameter defaults.
# Training data
In MLJ or MLJBase, bind an instance `model` to data with
In MLJ or MLJBase, bind an instance `rfe_model` to data with
mach = machine(model, X, y)
mach = machine(rfe_model, X, y)
OR, if the base model supports weights, as
mach = machine(model, X, y, w)
mach = machine(rfe_model, X, y, w)
Here:
- `X` is any table of input features (eg, a `DataFrame`) whose columns are of the scitype
as that required by the base model; check column scitypes with `schema(X)` and column
as that required by the base model; check column scitypes with `schema(X)` and column
scitypes required by base model with `input_scitype(basemodel)`.
- `y` is the target, which can be any table of responses whose element scitype is
`Continuous` or `Finite` depending on the `target_scitype` required by the base model;
- `y` is the target, which can be any table of responses whose element scitype is
`Continuous` or `Finite` depending on the `target_scitype` required by the base model;
check the scitype with `scitype(y)`.
- `w` is the observation weights which can either be `nothing`(default) or an
`AbstractVector` whoose element scitype is `Count` or `Continuous`. This is different
- `w` is the observation weights which can either be `nothing`(default) or an
`AbstractVector` whoose element scitype is `Count` or `Continuous`. This is different
from `weights` kernel which is an hyperparameter to the model, see below.
Train the machine using `fit!(mach, rows=...)`.
# Hyper-parameters
- model: A base model with a `fit` method that provides information on feature
- model: A base model with a `fit` method that provides information on feature
feature importance (i.e `reports_feature_importances(model) == true`)
- n_features::Real = 0: The number of features to select. If `0`, half of the
features are selected. If a positive integer, the parameter is the absolute number
of features to select. If a real number between 0 and 1, it is the fraction of features
- n_features::Real = 0: The number of features to select. If `0`, half of the
features are selected. If a positive integer, the parameter is the absolute number
of features to select. If a real number between 0 and 1, it is the fraction of features
to select.
- step::Real=1: If the value of step is at least 1, it signifies the quantity of features to
eliminate in each iteration. Conversely, if step falls strictly within the range of
- step::Real=1: If the value of step is at least 1, it signifies the quantity of features to
eliminate in each iteration. Conversely, if step falls strictly within the range of
0.0 to 1.0, it denotes the proportion (rounded down) of features to remove during each iteration.
# Operations
- `transform(mach, X)`: transform the input table `X` into a new table containing only
- `transform(mach, X)`: transform the input table `X` into a new table containing only
columns corresponding to features gotten from the RFE algorithm.
- `predict(mach, X)`: transform the input table `X` into a new table same as in
- `predict(mach, X)`: transform the input table `X` into a new table same as in
- `transform(mach, X)` above and predict using the fitted base model on the
- `transform(mach, X)` above and predict using the fitted base model on the
transformed table.
# Fitted parameters
Expand All @@ -106,11 +107,11 @@ The fields of `fitted_params(mach)` are:
# Report
The fields of `report(mach)` are:
- `ranking`: The feature ranking of each features in the training dataset.
- `ranking`: The feature ranking of each features in the training dataset.
- `model_report`: report for the fitted base model.
- `features`: names of features seen during the training process.
- `features`: names of features seen during the training process.
# Examples
```
Expand All @@ -131,10 +132,10 @@ selector = RecursiveFeatureElimination(model = rf)
mach = machine(selector, X, y)
fit!(mach)
# view the feature importances
# view the feature importances
feature_importances(mach)
# predict using the base model
# predict using the base model
Xnew = MLJ.table(rand(rng, 50, 10));
predict(mach, Xnew)
Expand All @@ -160,7 +161,7 @@ function RecursiveFeatureElimination(
#TODO: Check that the specifed model implements the predict method.
# probably add a trait to check this
MMI.reports_feature_importances(model) || throw(ERR_FEATURE_IMPORTANCE_SUPPORT)
if model isa Deterministic
if model isa Deterministic
selector = DeterministicRecursiveFeatureElimination{typeof(model)}(
model, Float64(n_features), Float64(step)
)
Expand All @@ -170,7 +171,7 @@ function RecursiveFeatureElimination(
)
else
throw(ERR_MODEL_TYPE)
end
end
message = MMI.clean!(selector)
isempty(message) || @warn(message)
return selector
Expand Down Expand Up @@ -204,21 +205,21 @@ function MMI.fit(selector::RFE, verbosity::Int, X, y, args...)
n_features_select = selector.n_features
## zero indicates that half of the features be selected.
if n_features_select == 0
n_features_select = div(nfeatures, 2)
n_features_select = div(nfeatures, 2)
elseif 0 < n_features_select < 1
n_features_select = round(Int, n_features_select * nfeatures)
else
n_features_select = round(Int, n_features_select)
end

step = selector.step

if 0 < step < 1
step = round(Int, max(1, step * n_features_select))
else
step = round(Int, step)
step = round(Int, step)
end

support = trues(nfeatures)
ranking = ones(Int, nfeatures) # every feature has equal rank initially
mask = trues(nfeatures) # for boolean indexing of ranking vector in while loop below
Expand All @@ -230,7 +231,7 @@ function MMI.fit(selector::RFE, verbosity::Int, X, y, args...)
# Rank the remaining features
model = selector.model
verbosity > 0 && @info("Fitting estimator with $(n_features_left) features.")

data = MMI.reformat(model, MMI.selectcols(X, features_left), args...)

fitresult, _, report = MMI.fit(model, verbosity - 1, data...)
Expand Down Expand Up @@ -263,14 +264,14 @@ function MMI.fit(selector::RFE, verbosity::Int, X, y, args...)
data = MMI.reformat(selector.model, MMI.selectcols(X, features_left), args...)
verbosity > 0 && @info ("Fitting estimator with $(n_features_left) features.")
model_fitresult, _, model_report = MMI.fit(selector.model, verbosity - 1, data...)

fitresult = (
support = support,
model_fitresult = model_fitresult,
features_left = features_left,
features = features
)
report = (
report = (
ranking = ranking,
model_report = model_report
)
Expand All @@ -294,7 +295,7 @@ end

function MMI.transform(::RFE, fitresult, X)
sch = Tables.schema(Tables.columns(X))
if (length(fitresult.features) == length(sch.names) &&
if (length(fitresult.features) == length(sch.names) &&
!all(e -> e in sch.names, fitresult.features))
throw(
ERR_FEATURES_SEEN
Expand All @@ -312,7 +313,7 @@ function MMI.save(model::RFE, fitresult)
atomic_fitresult = fitresult.model_fitresult
features_left = fitresult.features_left
features = fitresult.features

atom = model.model
return (
support = copy(support),
Expand All @@ -337,14 +338,12 @@ function MMI.restore(model::RFE, serializable_fitresult)
)
end

## Traits definitions
function MMI.load_path(::Type{<:DeterministicRecursiveFeatureElimination})
return "FeatureSelection.DeterministicRecursiveFeatureElimination"
end
## Trait definitions

function MMI.load_path(::Type{<:ProbabilisticRecursiveFeatureElimination})
return "FeatureSelection.ProbabilisticRecursiveFeatureElimination"
end
# load path points to constructor not type:
MMI.load_path(::Type{<:RFE}) = "FeatureSelection.RecursiveFeatureElimination"
MMI.constructor(::Type{<:RFE}) = RecursiveFeatureElimination
MMI.package_name(::Type{<:RFE}) = "FeatureSelection"

for trait in [
:supports_weights,
Expand Down Expand Up @@ -387,4 +386,17 @@ end
## TRAINING LOSSES SUPPORT
function MMI.training_losses(model::RFE, rfe_report)
return MMI.training_losses(model.model, rfe_report.model_report)
end
end

## Pkg Traits
MMI.metadata_pkg.(
(
DeterministicRecursiveFeatureElimination,
ProbabilisticRecursiveFeatureElimination,
),
package_name = "FeatureSelection",
package_uuid = "33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6",
package_url = "https://github.com/JuliaAI/FeatureSelection.jl",
is_pure_julia = true,
package_license = "MIT"
)
11 changes: 7 additions & 4 deletions test/models/rfe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,15 @@ const DTM = DummyTestModels
@test_throws FeatureSelection.ERR_SPECIFY_MODEL RecursiveFeatureElimination()
reg = DTM.DeterministicConstantRegressor()
@test_throws(
FeatureSelection.ERR_FEATURE_IMPORTANCE_SUPPORT,
FeatureSelection.ERR_FEATURE_IMPORTANCE_SUPPORT,
RecursiveFeatureElimination(model = DTM.DeterministicConstantRegressor())
)
rf = MLJDecisionTreeInterface.RandomForestRegressor(rng = rng)
selector = RecursiveFeatureElimination(model = rf)
@test selector isa FeatureSelection.DeterministicRecursiveFeatureElimination
@test MLJBase.constructor(selector) == RecursiveFeatureElimination
@test MLJBase.package_name(selector) == "FeatureSelection"
@test MLJBase.load_path(selector) == "FeatureSelection.RecursiveFeatureElimination"

# Fit
selector_mach = machine(selector, Xt, y)
Expand All @@ -34,7 +37,7 @@ const DTM = DummyTestModels
selector_mach.model.model, selector_mach.fitresult.model_fitresult
)
@test feature_importances(selector_mach) == [
:x1 => 6.0, :x2 => 5.0, :x3 => 4.0, :x4 => 3.0, :x5 => 2.0,
:x1 => 6.0, :x2 => 5.0, :x3 => 4.0, :x4 => 3.0, :x5 => 2.0,
:x6 => 1.0, :x7 => 1.0, :x8 => 1.0, :x9 => 1.0, :x10 => 1.0
]
rpt = report(selector_mach)
Expand Down Expand Up @@ -94,7 +97,7 @@ end
measure = rms,
tuning = Grid(rng=rng),
resampling = StratifiedCV(nfolds = 5),
range = range(rfecv, :n_features, values = 1:10)
range = range(rfecv, :n_features, values = 1:10)
)
self_tuning_rfe_mach = machine(tuning_rfe_model, Xs, ys)
fit!(self_tuning_rfe_mach)
Expand Down Expand Up @@ -127,4 +130,4 @@ end
mach2 = MLJBase.machine(io)
close(io)
@test MLJBase.predict(mach2, (; x1=rand(2), x2 = rand(2))) == yhat
end
end

0 comments on commit 5d605c6

Please sign in to comment.