Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable Discrete Outcomes w/ RScorer #927

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions econml/score/rscorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ class RScorer:
discrete_treatment: bool, default ``False``
Whether the treatment values should be treated as categorical, rather than continuous, quantities

discrete_outcome: bool, default ``False``
Whether the outcome should be treated as binary

categories: 'auto' or list, default 'auto'
The categories to use when encoding discrete treatments (or 'auto' to use the unique sorted values).
The first category will be treated as the control treatment.
Expand Down Expand Up @@ -104,6 +107,7 @@ def __init__(self, *,
model_y,
model_t,
discrete_treatment=False,
discrete_outcome=False,
categories='auto',
cv=2,
mc_iters=None,
Expand All @@ -112,6 +116,7 @@ def __init__(self, *,
self.model_y = clone(model_y, safe=False)
self.model_t = clone(model_t, safe=False)
self.discrete_treatment = discrete_treatment
self.discrete_outcome = discrete_outcome
self.cv = cv
self.categories = categories
self.random_state = random_state
Expand Down Expand Up @@ -150,6 +155,7 @@ def fit(self, y, T, X=None, W=None, sample_weight=None, groups=None):
model_t=self.model_t,
cv=self.cv,
discrete_treatment=self.discrete_treatment,
discrete_outcome=self.discrete_outcome,
categories=self.categories,
random_state=self.random_state,
mc_iters=self.mc_iters,
Expand Down
135 changes: 96 additions & 39 deletions econml/tests/test_rscorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,52 +20,109 @@ def _fit_model(name, model, Y, T, X):

class TestRScorer(unittest.TestCase):

def _get_data(self):
def _get_data(self, discrete_outcome=False):
X = np.random.normal(0, 1, size=(100000, 2))
T = np.random.binomial(1, .5, size=(100000,))
y = X[:, 0] * T + np.random.normal(size=(100000,))
return y, T, X, X[:, 0]
if discrete_outcome:
eps = np.random.normal(size=(100000,))
log_odds = X[:, 0]*T + eps
y_sigmoid = 1/(1 + np.exp(-log_odds))
y = np.array([np.random.binomial(1, p) for p in y_sigmoid])
# Difference in conditional probabilities P(y=1|X,T=1) - P(y=0|X,T=0)
true_eff = (1 / (1 + np.exp(-(X[:, 0]+eps)))) - (1 / (1 + np.exp(-eps)))
else:
y = X[:, 0] * T + np.random.normal(size=(100000,))
true_eff = X[:, 0]
return y, T, X, true_eff

def test_comparison(self):

def reg():
return LinearRegression()

def clf():
return LogisticRegression()

y, T, X, true_eff = self._get_data()
(X_train, X_val, T_train, T_val,
Y_train, Y_val, _, true_eff_val) = train_test_split(X, T, y, true_eff, test_size=.4)

models = [('ldml', LinearDML(model_y=reg(), model_t=clf(), discrete_treatment=True, cv=3)),
('sldml', SparseLinearDML(model_y=reg(), model_t=clf(), discrete_treatment=True,
featurizer=PolynomialFeatures(degree=2, include_bias=False), cv=3)),
('xlearner', XLearner(models=reg(), cate_models=reg(), propensity_model=clf())),
('dalearner', DomainAdaptationLearner(models=reg(), final_models=reg(), propensity_model=clf())),
('slearner', SLearner(overall_model=reg())),
('tlearner', TLearner(models=reg())),
('drlearner', DRLearner(model_propensity=clf(), model_regression=reg(),
model_final=reg(), cv=3)),
('rlearner', NonParamDML(model_y=reg(), model_t=clf(), model_final=reg(),
discrete_treatment=True, cv=3)),
('dml3dlasso', DML(model_y=reg(), model_t=clf(), model_final=reg(), discrete_treatment=True,
featurizer=PolynomialFeatures(degree=3), cv=3))
]

models = Parallel(n_jobs=1, verbose=1)(delayed(_fit_model)(name, mdl,
Y_train, T_train, X_train)
for name, mdl in models)

scorer = RScorer(model_y=reg(), model_t=clf(),
discrete_treatment=True, cv=3, mc_iters=2, mc_agg='median')
scorer.fit(Y_val, T_val, X=X_val)
rscore = [scorer.score(mdl) for _, mdl in models]
rootpehe_score = [np.sqrt(np.mean((true_eff_val.flatten() - mdl.effect(X_val).flatten())**2))
for _, mdl in models]
assert LinearRegression().fit(np.array(rscore).reshape(-1, 1), np.array(rootpehe_score)).coef_ < 0.5
mdl, _ = scorer.best_model([mdl for _, mdl in models])
rootpehe_best = np.sqrt(np.mean((true_eff_val.flatten() - mdl.effect(X_val).flatten())**2))
assert rootpehe_best < 1.5 * np.min(rootpehe_score) + 0.05
mdl, _ = scorer.ensemble([mdl for _, mdl in models])
rootpehe_ensemble = np.sqrt(np.mean((true_eff_val.flatten() - mdl.effect(X_val).flatten())**2))
assert rootpehe_ensemble < 1.5 * np.min(rootpehe_score) + 0.05
test_cases = [
{"name":"continuous_outcome", "discrete_outcome": False},
{"name":"discrete_outcome", "discrete_outcome": True}
]

for case in test_cases:
with self.subTest(case["name"]):
discrete_outcome = case["discrete_outcome"]

if discrete_outcome:
y, T, X, true_eff = self._get_data(discrete_outcome=True)

models = [('ldml', LinearDML(model_y=clf(), model_t=clf(), discrete_treatment=True,
discrete_outcome=discrete_outcome, cv=3)),
('sldml', SparseLinearDML(model_y=clf(), model_t=clf(), discrete_treatment=True,
discrete_outcome=discrete_outcome,
featurizer=PolynomialFeatures(degree=2, include_bias=False),
cv=3)),
('drlearner', DRLearner(model_propensity=clf(), model_regression=clf(), model_final=reg(),
discrete_outcome=discrete_outcome, cv=3)),
('rlearner', NonParamDML(model_y=clf(), model_t=clf(), model_final=reg(),
discrete_treatment=True, discrete_outcome=discrete_outcome, cv=3)),
('dml3dlasso', DML(model_y=clf(), model_t=clf(), model_final=reg(), discrete_treatment=True,
discrete_outcome=discrete_outcome,
featurizer=PolynomialFeatures(degree=3), cv=3)),
# SLearner as baseline for rootpehe score - not enough variation in rscore w/ above models
('slearner', SLearner(overall_model=reg())),
]

else:
y, T, X, true_eff = self._get_data()

models = [('ldml', LinearDML(model_y=reg(), model_t=clf(), discrete_treatment=True, cv=3)),
('sldml', SparseLinearDML(model_y=reg(), model_t=clf(), discrete_treatment=True,
featurizer=PolynomialFeatures(degree=2, include_bias=False),
cv=3)),
('xlearner', XLearner(models=reg(), cate_models=reg(), propensity_model=clf())),
('dalearner', DomainAdaptationLearner(models=reg(), final_models=reg(),
propensity_model=clf())),
('slearner', SLearner(overall_model=reg())),
('tlearner', TLearner(models=reg())),
('drlearner', DRLearner(model_propensity=clf(), model_regression=reg(),
model_final=reg(), cv=3)),
('rlearner', NonParamDML(model_y=reg(), model_t=clf(), model_final=reg(),
discrete_treatment=True, cv=3)),
('dml3dlasso', DML(model_y=reg(), model_t=clf(), model_final=reg(),
discrete_treatment=True, featurizer=PolynomialFeatures(degree=3), cv=3))
]

(X_train, X_val, T_train, T_val,
Y_train, Y_val, _, true_eff_val) = train_test_split(X, T, y, true_eff, test_size=.4)

models = Parallel(n_jobs=1, verbose=1)(delayed(_fit_model)(name, mdl,
Y_train, T_train, X_train)
for name, mdl in models)

if discrete_outcome:
scorer = RScorer(model_y=clf(), model_t=clf(),
discrete_treatment=True, discrete_outcome=discrete_outcome,
cv=3, mc_iters=2, mc_agg='median')
else:
scorer = RScorer(model_y=reg(), model_t=clf(),
discrete_treatment=True, cv=3,
mc_iters=2, mc_agg='median')

scorer.fit(Y_val, T_val, X=X_val)
rscore = [scorer.score(mdl) for _, mdl in models]
rootpehe_score = [np.sqrt(np.mean((true_eff_val.flatten() - mdl.effect(X_val).flatten())**2))
for _, mdl in models]
# Checking neg corr between rscore and rootpehe (precision in estimating heterogeneous effects)
assert LinearRegression().fit(np.array(rscore).reshape(-1, 1), np.array(rootpehe_score)).coef_ < 0.5

if discrete_outcome:
# Removing SLearner. Only utilized for introducting variation in r- & rootpehe score
models = [m for m in models if m[0] != 'slearner']
mdl, _ = scorer.best_model([mdl for _, mdl in models])
rootpehe_best = np.sqrt(np.mean((true_eff_val.flatten() - mdl.effect(X_val).flatten())**2))
# Checking best model selection behaves as intended
assert rootpehe_best < 1.5 * np.min(rootpehe_score) + 0.05
mdl, _ = scorer.ensemble([mdl for _, mdl in models])
rootpehe_ensemble = np.sqrt(np.mean((true_eff_val.flatten() - mdl.effect(X_val).flatten())**2))
# Checking cate ensembling behaves as intended
assert rootpehe_ensemble < 1.5 * np.min(rootpehe_score) + 0.05
Loading