Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DML IV #218

Merged
merged 9 commits into from
Feb 18, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion doc/reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Public Module Reference
.. autosummary::
:toctree: _autosummary

econml.automated_ml
econml.bootstrap
econml.cate_estimator
econml.cate_interpreter
Expand All @@ -15,9 +16,9 @@ Public Module Reference
econml.inference
econml.ortho_forest
econml.metalearners
econml.ortho_iv
econml.two_stage_least_squares
econml.utilities
econml.automated_ml

Private Module Reference
========================
Expand Down
101 changes: 72 additions & 29 deletions econml/_ortho_learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,13 @@ def _crossfit(model, folds, *args, **kwargs):
function estimates a model of the nuisance function, based on the input
data to fit. Predict evaluates the fitted nuisance function on the input
data to predict.
folds : list of tuples
folds : list of tuples or None
The crossfitting fold structure. Every entry in the list is a tuple whose
first element are the training indices of the args and kwargs data and
the second entry are the test indices. If the union of the test indices
is not the full set of all indices, then the remaining nuisance parameters
for the missing indices have value NaN.
for the missing indices have value NaN. If folds is None, then cross fitting
is not performed; all indices are used for both model fitting and prediction
args : a sequence of (numpy matrices or None)
Each matrix is a data variable whose first index corresponds to a sample
kwargs : a sequence of key-value args, with values being (numpy matrices or None)
Expand Down Expand Up @@ -120,6 +121,19 @@ def predict(self, X, y, W=None):
"""
model_list = []
fitted_inds = []

if folds is None: # skip crossfitting
model_list.append(clone(model, safe=False))
kwargs = {k: v for k, v in kwargs.items() if v is not None}
model_list[0].fit(*args, **kwargs)
nuisances = model_list[0].predict(*args, **kwargs)

if not isinstance(nuisances, tuple):
nuisances = (nuisances,)

first_arr = args[0] if args else kwargs.items()[0][1]
return nuisances, model_list, range(first_arr.shape[0])

for idx, (train_idxs, test_idxs) in enumerate(folds):
model_list.append(clone(model, safe=False))
if len(np.intersect1d(train_idxs, test_idxs)) > 0:
Expand All @@ -129,18 +143,11 @@ def predict(self, X, y, W=None):
raise AttributeError("Invalid crossfitting fold structure. The same index appears in two test folds.")
fitted_inds = np.concatenate((fitted_inds, test_idxs))

args_train = ()
args_test = ()
for var in args:
args_train += (var[train_idxs],) if var is not None else (None,)
args_test += (var[test_idxs],) if var is not None else (None,)
args_train = tuple(var[train_idxs] if var is not None else None for var in args)
args_test = tuple(var[test_idxs] if var is not None else None for var in args)

kwargs_train = {}
kwargs_test = {}
for key, var in kwargs.items():
if var is not None:
kwargs_train[key] = var[train_idxs]
kwargs_test[key] = var[test_idxs]
kwargs_train = {key: var[train_idxs] for key, var in kwargs.items() if var is not None}
kwargs_test = {key: var[test_idxs] for key, var in kwargs.items() if var is not None}

model_list[idx].fit(*args_train, **kwargs_train)

Expand Down Expand Up @@ -246,6 +253,9 @@ class _OrthoLearner(TreatmentExpansionMixin, LinearCateEstimator):
discrete_treatment: bool
Whether the treatment values should be treated as categorical, rather than continuous, quantities

discrete_instrument: bool
Whether the instrument values should be treated as categorical, rather than continuous, quantities

n_splits: int, cross-validation generator or an iterable
Determines the cross-validation splitting strategy.
Possible inputs for cv are:
Expand Down Expand Up @@ -310,7 +320,8 @@ def score(self, Y, T, W=None, nuisances=None):
y = X[:, 0] + X[:, 1] + np.random.normal(0, 0.1, size=(100,))
est = _OrthoLearner(ModelNuisance(LinearRegression(), LinearRegression()),
ModelFinal(),
n_splits=2, discrete_treatment=False, random_state=None)
n_splits=2, discrete_treatment=False, discrete_instrument=False,
random_state=None)
est.fit(y, X[:, 0], W=X[:, 1:])

>>> est.score_
Expand Down Expand Up @@ -372,7 +383,8 @@ def score(self, Y, T, W=None, nuisances=None):
T = np.random.binomial(1, scipy.special.expit(W[:, 0]))
y = T + W[:, 0] + np.random.normal(0, 0.01, size=(100,))
est = _OrthoLearner(ModelNuisance(LogisticRegression(solver='lbfgs'), LinearRegression()),
ModelFinal(), n_splits=2, discrete_treatment=True, random_state=None)
ModelFinal(), n_splits=2, discrete_treatment=True, discrete_instrument=False,
random_state=None)
est.fit(y, T, W=W)

>>> est.score_
Expand All @@ -399,13 +411,14 @@ def score(self, Y, T, W=None, nuisances=None):
of the final CATE model.
"""

def __init__(self, model_nuisance, model_final,
discrete_treatment, n_splits, random_state):
def __init__(self, model_nuisance, model_final, *,
discrete_treatment, discrete_instrument, n_splits, random_state):
self._model_nuisance = clone(model_nuisance, safe=False)
self._models_nuisance = None
self._model_final = clone(model_final, safe=False)
self._n_splits = n_splits
self._discrete_treatment = discrete_treatment
self._discrete_instrument = discrete_instrument
self._random_state = check_random_state(random_state)
if discrete_treatment:
self._label_encoder = LabelEncoder()
Expand Down Expand Up @@ -490,23 +503,51 @@ def fit(self, Y, T, X=None, W=None, Z=None, sample_weight=None, sample_var=None,

def _fit_nuisances(self, Y, T, X=None, W=None, Z=None, sample_weight=None):
# use a binary array to get stratified split in case of discrete treatment
splitter = check_cv(self._n_splits, [0], classifier=self._discrete_treatment)
# if check_cv produced a new KFold or StratifiedKFold object, we need to set shuffle and random_state
if splitter != self._n_splits and isinstance(splitter, (KFold, StratifiedKFold)):
splitter.shuffle = True
splitter.random_state = self._random_state

all_vars = [var if np.ndim(var) == 2 else var.reshape(-1, 1) for var in [Z, W, X] if var is not None]
if all_vars:
all_vars = np.hstack(all_vars)
folds = splitter.split(all_vars, T)
else:
folds = splitter.split(np.ones((T.shape[0], 1)), T)
stratify = self._discrete_treatment or self._discrete_instrument

if self._discrete_treatment:
T = self._label_encoder.fit_transform(T.ravel())

if self._discrete_instrument:
z_enc = LabelEncoder()
Z = z_enc.fit_transform(Z.ravel())

if self._discrete_treatment: # need to stratify on combination of Z and T
to_split = T + Z * len(self._label_encoder.classes_)
else:
to_split = Z # just stratify on Z

z_ohe = OneHotEncoder(categories='auto', sparse=False)
Z = z_ohe.fit_transform(reshape(Z, (-1, 1)))[:, 1:]
self.z_transformer = FunctionTransformer(
func=(lambda Z:
z_ohe.transform(
reshape(z_enc.transform(Z.ravel()), (-1, 1)))[:, 1:]),
validate=False)
else:
to_split = T # stratify on T if discrete, and fine to pass T as second arg to KFold.split even when not
self.z_transformer = None

if self._n_splits == 1: # special case, no cross validation
folds = None
else:
splitter = check_cv(self._n_splits, [0], classifier=stratify)
# if check_cv produced a new KFold or StratifiedKFold object, we need to set shuffle and random_state
if splitter != self._n_splits and isinstance(splitter, (KFold, StratifiedKFold)):
splitter.shuffle = True
splitter.random_state = self._random_state

all_vars = [var if np.ndim(var) == 2 else var.reshape(-1, 1) for var in [Z, W, X] if var is not None]
if all_vars:
all_vars = np.hstack(all_vars)
folds = splitter.split(all_vars, to_split)
else:
folds = splitter.split(np.ones((T.shape[0], 1)), to_split)

if self._discrete_treatment:
# drop first column since all columns sum to one
T = self._one_hot_encoder.fit_transform(reshape(T, (-1, 1)))[:, 1:]

self._d_t = shape(T)[1:]
self.transformer = FunctionTransformer(
func=(lambda T:
Expand Down Expand Up @@ -592,6 +633,8 @@ def score(self, Y, T, X=None, W=None, Z=None):
self._check_fitted_dims(X)
self._check_fitted_dims_w_z(W, Z)
X, T = self._expand_treatments(X, T)
if self.z_transformer is not None:
Z = self.z_transformer.transform(Z)
n_splits = len(self._models_nuisance)
for idx, mdl in enumerate(self._models_nuisance):
nuisance_temp = mdl.predict(Y, T, **self._filter_none_kwargs(X=X, W=W, Z=Z))
Expand Down
22 changes: 8 additions & 14 deletions econml/_rlearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,19 +28,9 @@
import numpy as np
import copy
from warnings import warn
from .utilities import (shape, reshape, ndim, hstack, cross_product, transpose,
broadcast_unit_treatments, reshape_treatmentwise_effects,
StatsModelsLinearRegression, LassoCVWrapper)
from sklearn.model_selection import KFold, StratifiedKFold, check_cv
from sklearn.linear_model import LinearRegression, LassoCV
from sklearn.preprocessing import (PolynomialFeatures, LabelEncoder, OneHotEncoder,
FunctionTransformer)
from sklearn.base import clone, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.utils import check_random_state
from .cate_estimator import (BaseCateEstimator, LinearCateEstimator,
TreatmentExpansionMixin, StatsModelsCateEstimatorMixin)
from .inference import StatsModelsInference
from .utilities import (shape, reshape, ndim, hstack)
from sklearn.linear_model import LinearRegression
from sklearn.base import clone
from ._ortho_learner import _OrthoLearner


Expand Down Expand Up @@ -270,7 +260,11 @@ def score(self, Y, T, X=None, W=None, Z=None, nuisances=None, sample_weight=None
return np.mean((Y_res - Y_res_pred)**2)

super().__init__(ModelNuisance(model_y, model_t),
ModelFinal(model_final), discrete_treatment, n_splits, random_state)
ModelFinal(model_final),
discrete_treatment=discrete_treatment,
discrete_instrument=False, # no instrument, so doesn't matter
n_splits=n_splits,
random_state=random_state)

def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, inference=None):
"""
Expand Down
1 change: 1 addition & 0 deletions econml/drlearner.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,7 @@ def score(self, Y, T, X=None, W=None, *, nuisances, sample_weight=None, sample_v
super().__init__(ModelNuisance(model_propensity, model_regression),
ModelFinal(model_final, featurizer, multitask_model_final),
n_splits=n_splits, discrete_treatment=True,
discrete_instrument=False, # no instrument, so doesn't matter
random_state=random_state)

def fit(self, Y, T, X=None, W=None, *, sample_weight=None, sample_var=None, inference=None):
Expand Down
2 changes: 2 additions & 0 deletions econml/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,8 @@ def __init__(self, cov_type='HC1'):

def prefit(self, estimator, *args, **kwargs):
super().prefit(estimator, *args, **kwargs)
assert not (self.model_final.fit_intercept), ("Inference can only be performed on models linear in "
"their features, but here fit_intercept is True")
self.model_final.cov_type = self.cov_type


Expand Down
Loading