carl.learning module
This module implements machine learning algorithms and utilities, complementary to Scikit-Learn.
""" This module implements machine learning algorithms and utilities, complementary to Scikit-Learn. """ # Carl is free software; you can redistribute it and/or modify it # under the terms of the Revised BSD License; see LICENSE file for # more details. from .base import as_classifier from .base import check_cv from .calibration import CalibratedClassifierCV from .parameterize import make_parameterized_classification from .parameterize import ParameterStacker from .parameterize import ParameterizedClassifier from .parameterize import ParameterizedRegressor __all__ = ("as_classifier", "check_cv", "CalibratedClassifierCV", "make_parameterized_classification", "ParameterStacker", "ParameterizedClassifier", "ParameterizedRegressor",)
Functions
def as_classifier(
regressor)
Wrap a Scikit-Learn regressor into a binary classifier.
This function can be used to solve a binary classification problem as a
regression problem, where output labels {0,1} are treated as real values.
The wrapped regressor exhibits the classifier API, with the corresponding
predict
, predict_proba
and score
methods.
Parameters
regressor
[RegressorMixin
]: The regressor object.
Returns
clf
[ClassifierMixin
]: The wrapped regressor, but with a classifier API.
def as_classifier(regressor): """Wrap a Scikit-Learn regressor into a binary classifier. This function can be used to solve a binary classification problem as a regression problem, where output labels {0,1} are treated as real values. The wrapped regressor exhibits the classifier API, with the corresponding `predict`, `predict_proba` and `score` methods. Parameters ---------- * `regressor` [`RegressorMixin`]: The regressor object. Returns ------- * `clf` [`ClassifierMixin`]: The wrapped regressor, but with a classifier API. """ class Wrapper(BaseEstimator, ClassifierMixin): def __init__(self, base_estimator): self.base_estimator = base_estimator def fit(self, X, y, **kwargs): # Check inputs X, y = check_X_y(X, y) # Convert y label_encoder = LabelEncoder() y = label_encoder.fit_transform(y).astype(np.float) if len(label_encoder.classes_) != 2: raise ValueError self.classes_ = label_encoder.classes_ # Fit regressor self.regressor_ = clone(self.base_estimator).fit(X, y, **kwargs) return self def predict(self, X): return np.where(self.predict_proba(X)[:, 1] >= 0.5, self.classes_[1], self.classes_[0]) def predict_proba(self, X): X = check_array(X) df = self.regressor_.predict(X) df = np.clip(df, 0., 1.) probas = np.zeros((len(X), 2)) probas[:, 0] = 1. - df probas[:, 1] = df return probas def score(self, X, y): return self.regressor_.score(X, y) return Wrapper(regressor)
def check_cv(
cv=3, X=None, y=None, classifier=False)
Input checker utility for building a cross-validator.
Parameters
-
cv
[integer, cross-validation generator or an iterable, default=3
]: Determines the cross-validation splitting strategy. Possible inputs for cv are:- integer, to specify the number of folds.
- An object to be used as a cross-validation generator.
- An iterable yielding train/test splits.
For integer/None inputs, if classifier is True and
y
is either binary or multiclass,StratifiedKFold
used. In all other cases,KFold
is used. -
y
[array-like, optional]: The target variable for supervised learning problems. -
classifier
[boolean, default=False
]: Whether the task is a classification task, in which case stratifiedKFold
will be used.
Returns
checked_cv
[a cross-validator instance]: The return value is a cross-validator which generates the train/test splits via thesplit
method.
Note
This method is backported from scikit-learn 0.18.
def check_cv(cv=3, X=None, y=None, classifier=False): """Input checker utility for building a cross-validator. Parameters ---------- * `cv` [integer, cross-validation generator or an iterable, default=`3`]: Determines the cross-validation splitting strategy. Possible inputs for cv are: - integer, to specify the number of folds. - An object to be used as a cross-validation generator. - An iterable yielding train/test splits. For integer/None inputs, if classifier is True and `y` is either binary or multiclass, `StratifiedKFold` used. In all other cases, `KFold` is used. * `y` [array-like, optional]: The target variable for supervised learning problems. * `classifier` [boolean, default=`False`]: Whether the task is a classification task, in which case stratified `KFold` will be used. Returns ------- * `checked_cv` [a cross-validator instance]: The return value is a cross-validator which generates the train/test splits via the `split` method. Note ---- This method is backported from scikit-learn 0.18. """ return sklearn_check_cv(cv, y=y, classifier=classifier)
def make_parameterized_classification(
p0, p1, n_samples, params, random_state=None)
Generate parameterized classification data.
This function generates parameterized classification data, by enumerating
all possible combinations of provided parameter values and producing
samples in equal number from p0
and p1
.
Parameters
-
p0
[DistributionMixin
]: The distribution to draw samples from class 0. -
p1
[DistributionMixin
]: The distribution to draw samples from class 1. -
n_samples
[integer]: The total number of samples to generate. -
params
[list of pairs (theano shared variables, list of values) or list of theano shared variables]: The list of parameters and the corresponding values to generate samples for. If only a list of theano shared variables is given, then generate samples using the current parameter values. -
random_state
[integer or RandomState object]: The random seed.
Returns
-
X
[array, shape=(n_samples, n_features+len(params))]: The generated training data, as sample features and concatenated parameter values. -
y
[array, shape=(n_samples,)]: The labels.
def make_parameterized_classification(p0, p1, n_samples, params, random_state=None): """Generate parameterized classification data. This function generates parameterized classification data, by enumerating all possible combinations of provided parameter values and producing samples in equal number from `p0` and `p1`. Parameters ---------- * `p0` [`DistributionMixin`]: The distribution to draw samples from class 0. * `p1` [`DistributionMixin`]: The distribution to draw samples from class 1. * `n_samples` [integer]: The total number of samples to generate. * `params` [list of pairs (theano shared variables, list of values) or list of theano shared variables]: The list of parameters and the corresponding values to generate samples for. If only a list of theano shared variables is given, then generate samples using the current parameter values. * `random_state` [integer or RandomState object]: The random seed. Returns ------- * `X` [array, shape=(n_samples, n_features+len(params))]: The generated training data, as sample features and concatenated parameter values. * `y` [array, shape=(n_samples,)]: The labels. """ rng = check_random_state(random_state) if not isinstance(params[0], tuple): X0 = p0.rvs(n_samples // 2, random_state=rng) X1 = p1.rvs(n_samples - (n_samples // 2), random_state=rng) X = ParameterStacker(params).transform(np.vstack((X0, X1))) y = np.zeros(n_samples) y[len(X0):] = 1 return X, y elif isinstance(params[0], tuple): combinations = list(product(*[values for _, values in params])) all_X = [] all_y = [] for c in combinations: for i, v in enumerate(c): params[i][0].set_value(v) X, y = make_parameterized_classification( p0, p1, n_samples // len(combinations), [p for p, _ in params], random_state=rng) all_X.append(X) all_y.append(y) X = np.vstack(all_X) y = np.concatenate(all_y) return X, y else: raise ValueError
Classes
class CalibratedClassifierCV
Probability calibration.
With this class, the base_estimator
is fit on the train set of the
cross-validation generator and the test set is used for calibration. The
probabilities for each of the folds are then averaged for prediction.
class CalibratedClassifierCV(BaseEstimator, ClassifierMixin): """Probability calibration. With this class, the `base_estimator` is fit on the train set of the cross-validation generator and the test set is used for calibration. The probabilities for each of the folds are then averaged for prediction. """ def __init__(self, base_estimator, method="histogram", bins="auto", interpolation=None, variable_width=False, cv=1): """Constructor. Parameters ---------- * `base_estimator` [`ClassifierMixin`]: The classifier whose output decision function needs to be calibrated to offer more accurate predict_proba outputs. If `cv=prefit`, the classifier must have been fit already on data. * `method` [string]: The method to use for calibration. Supported methods include `"histogram"`, `"kde"`, `"isotonic"`, `"interpolated-isotonic"` and `"sigmoid"`. * `bins` [int, default="auto"]: The number of bins, if `method` is `"histogram"`. * `interpolation` [string, optional] Specifies the kind of interpolation between bins as a string (`"linear"`, `"nearest"`, `"zero"`, `"slinear"`, `"quadratic"`, `"cubic"`), if `method` is `"histogram"`. * `variable_dith_width` [boolean, optional] If True use equal probability variable length bins, if `method` is `"histogram"`. * `cv` [integer, cross-validation generator, iterable or `"prefit"`]: Determines the cross-validation splitting strategy. Possible inputs for cv are: - integer, to specify the number of folds. - An object to be used as a cross-validation generator. - An iterable yielding train/test splits. If `"prefit"` is passed, it is assumed that base_estimator has been fitted already and all data is used for calibration. If `cv=1`, the training data is used for both training and calibration. """ self.base_estimator = base_estimator self.method = method self.bins = bins self.interpolation = interpolation self.variable_width = variable_width self.cv = cv def fit(self, X, y, sample_weight=None): """Fit the calibrated model. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: Training data. * `y` [array-like, shape=(n_samples,)]: Target values. Returns ------- * `self` [object]: `self`. """ # Check inputs X, y = check_X_y(X, y) # Convert y label_encoder = LabelEncoder() y = label_encoder.fit_transform(y).astype(np.float) if len(label_encoder.classes_) != 2: raise ValueError self.classes_ = label_encoder.classes_ # Calibrator if self.method == "histogram": base_calibrator = HistogramCalibrator( bins=self.bins, interpolation=self.interpolation, variable_width=self.variable_width) elif self.method == "kde": base_calibrator = KernelDensityCalibrator() elif self.method == "isotonic": base_calibrator = IsotonicCalibrator() elif self.method == "interpolated-isotonic": base_calibrator = IsotonicCalibrator(interpolation=True) elif self.method == "sigmoid": base_calibrator = SigmoidCalibrator() else: base_calibrator = self.method # Fit if self.cv == "prefit" or self.cv == 1: # Classifier if self.cv == 1: clf = clone(self.base_estimator) if isinstance(clf, RegressorMixin): clf = as_classifier(clf) if sample_weight is None: clf.fit(X, y) else: clf.fit(X, y, sample_weight=sample_weight) else: clf = self.base_estimator self.classifiers_ = [clf] # Calibrator calibrator = clone(base_calibrator) T = clf.predict_proba(X)[:, 1] if sample_weight is None: calibrator.fit(T, y) else: calibrator.fit(T, y, sample_weight=sample_weight) self.calibrators_ = [calibrator] else: self.classifiers_ = [] self.calibrators_ = [] cv = check_cv(self.cv, X=X, y=y, classifier=True) for train, calibrate in cv.split(X, y): # Classifier clf = clone(self.base_estimator) if isinstance(clf, RegressorMixin): clf = as_classifier(clf) if sample_weight is None: clf.fit(X[train], y[train]) else: clf.fit(X[train], y[train], sample_weight=sample_weight[train]) self.classifiers_.append(clf) # Calibrator calibrator = clone(base_calibrator) T = clf.predict_proba(X[calibrate])[:, 1] if sample_weight is None: calibrator.fit(T, y[calibrate]) else: calibrator.fit(T, y[calibrate], sample_weight=sample_weight[calibrate]) self.calibrators_.append(calibrator) return self def predict(self, X): """Predict the targets for `X`. Can be different from the predictions of the uncalibrated classifier. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. Returns ------- * `y` [array, shape=(n_samples,)]: The predicted class. """ return np.where(self.predict_proba(X)[:, 1] >= 0.5, self.classes_[1], self.classes_[0]) def predict_proba(self, X): """Predict the posterior probabilities of classification for `X`. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. Returns ------- * `probas` [array, shape=(n_samples, n_classes)]: The predicted probabilities. """ p = np.zeros((len(X), 2)) for clf, calibrator in zip(self.classifiers_, self.calibrators_): p[:, 1] += calibrator.predict(clf.predict_proba(X)[:, 1]) p[:, 1] /= len(self.classifiers_) p[:, 0] = 1. - p[:, 1] return p def _clone(self): estimator = clone(self, original=True) if self.cv == "prefit": estimator.base_estimator = self.base_estimator return estimator
Ancestors (in MRO)
- CalibratedClassifierCV
- sklearn.base.BaseEstimator
- sklearn.base.ClassifierMixin
- builtins.object
Static methods
def __init__(
self, base_estimator, method='histogram', bins='auto', interpolation=None, variable_width=False, cv=1)
Constructor.
Parameters
-
base_estimator
[ClassifierMixin
]: The classifier whose output decision function needs to be calibrated to offer more accurate predict_proba outputs. Ifcv=prefit
, the classifier must have been fit already on data. -
method
[string]: The method to use for calibration. Supported methods include"histogram"
,"kde"
,"isotonic"
,"interpolated-isotonic"
and"sigmoid"
. -
bins
[int, default="auto"]: The number of bins, ifmethod
is"histogram"
. -
interpolation
[string, optional] Specifies the kind of interpolation between bins as a string ("linear"
,"nearest"
,"zero"
,"slinear"
,"quadratic"
,"cubic"
), ifmethod
is"histogram"
. -
variable_dith_width
[boolean, optional] If True use equal probability variable length bins, ifmethod
is"histogram"
. -
cv
[integer, cross-validation generator, iterable or"prefit"
]: Determines the cross-validation splitting strategy. Possible inputs for cv are:- integer, to specify the number of folds.
- An object to be used as a cross-validation generator.
- An iterable yielding train/test splits.
If
"prefit"
is passed, it is assumed that base_estimator has been fitted already and all data is used for calibration. Ifcv=1
, the training data is used for both training and calibration.
def __init__(self, base_estimator, method="histogram", bins="auto", interpolation=None, variable_width=False, cv=1): """Constructor. Parameters ---------- * `base_estimator` [`ClassifierMixin`]: The classifier whose output decision function needs to be calibrated to offer more accurate predict_proba outputs. If `cv=prefit`, the classifier must have been fit already on data. * `method` [string]: The method to use for calibration. Supported methods include `"histogram"`, `"kde"`, `"isotonic"`, `"interpolated-isotonic"` and `"sigmoid"`. * `bins` [int, default="auto"]: The number of bins, if `method` is `"histogram"`. * `interpolation` [string, optional] Specifies the kind of interpolation between bins as a string (`"linear"`, `"nearest"`, `"zero"`, `"slinear"`, `"quadratic"`, `"cubic"`), if `method` is `"histogram"`. * `variable_dith_width` [boolean, optional] If True use equal probability variable length bins, if `method` is `"histogram"`. * `cv` [integer, cross-validation generator, iterable or `"prefit"`]: Determines the cross-validation splitting strategy. Possible inputs for cv are: - integer, to specify the number of folds. - An object to be used as a cross-validation generator. - An iterable yielding train/test splits. If `"prefit"` is passed, it is assumed that base_estimator has been fitted already and all data is used for calibration. If `cv=1`, the training data is used for both training and calibration. """ self.base_estimator = base_estimator self.method = method self.bins = bins self.interpolation = interpolation self.variable_width = variable_width self.cv = cv
def fit(
self, X, y, sample_weight=None)
Fit the calibrated model.
Parameters
-
X
[array-like, shape=(n_samples, n_features)]: Training data. -
y
[array-like, shape=(n_samples,)]: Target values.
Returns
self
[object]:self
.
def fit(self, X, y, sample_weight=None): """Fit the calibrated model. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: Training data. * `y` [array-like, shape=(n_samples,)]: Target values. Returns ------- * `self` [object]: `self`. """ # Check inputs X, y = check_X_y(X, y) # Convert y label_encoder = LabelEncoder() y = label_encoder.fit_transform(y).astype(np.float) if len(label_encoder.classes_) != 2: raise ValueError self.classes_ = label_encoder.classes_ # Calibrator if self.method == "histogram": base_calibrator = HistogramCalibrator( bins=self.bins, interpolation=self.interpolation, variable_width=self.variable_width) elif self.method == "kde": base_calibrator = KernelDensityCalibrator() elif self.method == "isotonic": base_calibrator = IsotonicCalibrator() elif self.method == "interpolated-isotonic": base_calibrator = IsotonicCalibrator(interpolation=True) elif self.method == "sigmoid": base_calibrator = SigmoidCalibrator() else: base_calibrator = self.method # Fit if self.cv == "prefit" or self.cv == 1: # Classifier if self.cv == 1: clf = clone(self.base_estimator) if isinstance(clf, RegressorMixin): clf = as_classifier(clf) if sample_weight is None: clf.fit(X, y) else: clf.fit(X, y, sample_weight=sample_weight) else: clf = self.base_estimator self.classifiers_ = [clf] # Calibrator calibrator = clone(base_calibrator) T = clf.predict_proba(X)[:, 1] if sample_weight is None: calibrator.fit(T, y) else: calibrator.fit(T, y, sample_weight=sample_weight) self.calibrators_ = [calibrator] else: self.classifiers_ = [] self.calibrators_ = [] cv = check_cv(self.cv, X=X, y=y, classifier=True) for train, calibrate in cv.split(X, y): # Classifier clf = clone(self.base_estimator) if isinstance(clf, RegressorMixin): clf = as_classifier(clf) if sample_weight is None: clf.fit(X[train], y[train]) else: clf.fit(X[train], y[train], sample_weight=sample_weight[train]) self.classifiers_.append(clf) # Calibrator calibrator = clone(base_calibrator) T = clf.predict_proba(X[calibrate])[:, 1] if sample_weight is None: calibrator.fit(T, y[calibrate]) else: calibrator.fit(T, y[calibrate], sample_weight=sample_weight[calibrate]) self.calibrators_.append(calibrator) return self
def predict(
self, X)
Predict the targets for X
.
Can be different from the predictions of the uncalibrated classifier.
Parameters
X
[array-like, shape=(n_samples, n_features)]: The samples.
Returns
y
[array, shape=(n_samples,)]: The predicted class.
def predict(self, X): """Predict the targets for `X`. Can be different from the predictions of the uncalibrated classifier. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. Returns ------- * `y` [array, shape=(n_samples,)]: The predicted class. """ return np.where(self.predict_proba(X)[:, 1] >= 0.5, self.classes_[1], self.classes_[0])
def predict_proba(
self, X)
Predict the posterior probabilities of classification for X
.
Parameters
X
[array-like, shape=(n_samples, n_features)]: The samples.
Returns
probas
[array, shape=(n_samples, n_classes)]: The predicted probabilities.
def predict_proba(self, X): """Predict the posterior probabilities of classification for `X`. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. Returns ------- * `probas` [array, shape=(n_samples, n_classes)]: The predicted probabilities. """ p = np.zeros((len(X), 2)) for clf, calibrator in zip(self.classifiers_, self.calibrators_): p[:, 1] += calibrator.predict(clf.predict_proba(X)[:, 1]) p[:, 1] /= len(self.classifiers_) p[:, 0] = 1. - p[:, 1] return p
Instance variables
var base_estimator
var bins
var cv
var interpolation
var method
var variable_width
class ParameterStacker
Stack current parameter values as additional features.
class ParameterStacker(BaseEstimator, TransformerMixin): """Stack current parameter values as additional features.""" def __init__(self, params): """Constructor. Parameters ---------- * `params` [list of Theano shared variables]: The parameters. """ self.params = params def transform(self, X, y=None): """Stack current parameter values as additional features. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. Returns ------- * `Xt` [array, shape=(n_samples, n_features+len(params))]: The horizontal concatenation of X with the current parameter values, added as new columns. """ Xp = np.empty((len(X), len(self.params))) for i, p in enumerate(self.params): Xp[:, i] = p.eval() return np.hstack((X, Xp))
Ancestors (in MRO)
- ParameterStacker
- sklearn.base.BaseEstimator
- sklearn.base.TransformerMixin
- builtins.object
Static methods
def __init__(
self, params)
Constructor.
Parameters
params
[list of Theano shared variables]: The parameters.
def __init__(self, params): """Constructor. Parameters ---------- * `params` [list of Theano shared variables]: The parameters. """ self.params = params
def transform(
self, X, y=None)
Stack current parameter values as additional features.
Parameters
X
[array-like, shape=(n_samples, n_features)]: The samples.
Returns
Xt
[array, shape=(n_samples, n_features+len(params))]: The horizontal concatenation of X with the current parameter values, added as new columns.
def transform(self, X, y=None): """Stack current parameter values as additional features. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. Returns ------- * `Xt` [array, shape=(n_samples, n_features+len(params))]: The horizontal concatenation of X with the current parameter values, added as new columns. """ Xp = np.empty((len(X), len(self.params))) for i, p in enumerate(self.params): Xp[:, i] = p.eval() return np.hstack((X, Xp))
Instance variables
var params
class ParameterizedClassifier
Parameterize a Scikit-Learn classifier.
This wrapper can be used to learn a parameterized classification problem, where parameter values are automatically added as additional features.
class ParameterizedClassifier(_ParameterizedEstimator, ClassifierMixin): """Parameterize a Scikit-Learn classifier. This wrapper can be used to learn a parameterized classification problem, where parameter values are automatically added as additional features. """ def predict_proba(self, X): """Predict the posterior probabilities of classification for X. Parameter values are automatically appended from the current state of the parameters if those are not provided with X. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features) or shape=(n_samples, n_features+len(params))]: The samples. Returns ------- * `probas` [array, shape=(n_samples, n_classes)]: The predicted probabilities. """ return self.estimator_.predict_proba(self._validate_X(X))
Ancestors (in MRO)
- ParameterizedClassifier
- carl.learning.parameterize._ParameterizedEstimator
- sklearn.base.BaseEstimator
- sklearn.base.ClassifierMixin
- builtins.object
Static methods
def __init__(
self, base_estimator, params)
Constructor.
Parameters
-
base_estimator
[BaseEstimator
]: The estimator to parameterize. -
params
[list of Theano shared variables]: The parameters.
def __init__(self, base_estimator, params): """Constructor. Parameters ---------- * `base_estimator` [`BaseEstimator`]: The estimator to parameterize. * `params` [list of Theano shared variables]: The parameters. """ self.base_estimator = base_estimator self.params = params
def fit(
self, X, y)
Fit estimator on parameterized data.
Parameters
-
X
[array-like, shape=(n_samples, n_features+len(params))]: The samples, concatenated with the corresponding parameter values. -
y
[array-like, shape=(n_samples,)]: The output values.
Returns
self
[object]:self
.
def fit(self, X, y): """Fit estimator on parameterized data. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features+len(params))]: The samples, concatenated with the corresponding parameter values. * `y` [array-like, shape=(n_samples,)]: The output values. Returns ------- * `self` [object]: `self`. """ self.stacker_ = ParameterStacker(self.params) # XXX: this assumes that X is extended with parameters self.n_features_ = X.shape[1] - len(self.params) self.estimator_ = clone(self.base_estimator).fit(X, y) return self
def predict(
self, X)
Predict the targets for X
.
Parameter values are automatically appended from the current state
of the parameters if those are not provided with X
.
Parameters
X
[array-like, shape=(n_samples, n_features) or shape=(n_samples, n_features+len(params))]: The samples.
Returns
y
[array, shape=(n_samples,)]: The predicted output values.
def predict(self, X): """Predict the targets for `X`. Parameter values are automatically appended from the current state of the parameters if those are not provided with `X`. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features) or shape=(n_samples, n_features+len(params))]: The samples. Returns ------- * `y` [array, shape=(n_samples,)]: The predicted output values. """ return self.estimator_.predict(self._validate_X(X))
def predict_proba(
self, X)
Predict the posterior probabilities of classification for X.
Parameter values are automatically appended from the current state of the parameters if those are not provided with X.
Parameters
X
[array-like, shape=(n_samples, n_features) or shape=(n_samples, n_features+len(params))]: The samples.
Returns
probas
[array, shape=(n_samples, n_classes)]: The predicted probabilities.
def predict_proba(self, X): """Predict the posterior probabilities of classification for X. Parameter values are automatically appended from the current state of the parameters if those are not provided with X. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features) or shape=(n_samples, n_features+len(params))]: The samples. Returns ------- * `probas` [array, shape=(n_samples, n_classes)]: The predicted probabilities. """ return self.estimator_.predict_proba(self._validate_X(X))
class ParameterizedRegressor
Parameterize a Scikit-Learn regressor.
This wrapper can be used to learn a parameterized regression problem, where parameter values are automatically added as additional features.
class ParameterizedRegressor(_ParameterizedEstimator, RegressorMixin): """Parameterize a Scikit-Learn regressor. This wrapper can be used to learn a parameterized regression problem, where parameter values are automatically added as additional features. """ pass
Ancestors (in MRO)
- ParameterizedRegressor
- carl.learning.parameterize._ParameterizedEstimator
- sklearn.base.BaseEstimator
- sklearn.base.RegressorMixin
- builtins.object
Static methods
def __init__(
self, base_estimator, params)
Constructor.
Parameters
-
base_estimator
[BaseEstimator
]: The estimator to parameterize. -
params
[list of Theano shared variables]: The parameters.
def __init__(self, base_estimator, params): """Constructor. Parameters ---------- * `base_estimator` [`BaseEstimator`]: The estimator to parameterize. * `params` [list of Theano shared variables]: The parameters. """ self.base_estimator = base_estimator self.params = params
def fit(
self, X, y)
Fit estimator on parameterized data.
Parameters
-
X
[array-like, shape=(n_samples, n_features+len(params))]: The samples, concatenated with the corresponding parameter values. -
y
[array-like, shape=(n_samples,)]: The output values.
Returns
self
[object]:self
.
def fit(self, X, y): """Fit estimator on parameterized data. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features+len(params))]: The samples, concatenated with the corresponding parameter values. * `y` [array-like, shape=(n_samples,)]: The output values. Returns ------- * `self` [object]: `self`. """ self.stacker_ = ParameterStacker(self.params) # XXX: this assumes that X is extended with parameters self.n_features_ = X.shape[1] - len(self.params) self.estimator_ = clone(self.base_estimator).fit(X, y) return self
def predict(
self, X)
Predict the targets for X
.
Parameter values are automatically appended from the current state
of the parameters if those are not provided with X
.
Parameters
X
[array-like, shape=(n_samples, n_features) or shape=(n_samples, n_features+len(params))]: The samples.
Returns
y
[array, shape=(n_samples,)]: The predicted output values.
def predict(self, X): """Predict the targets for `X`. Parameter values are automatically appended from the current state of the parameters if those are not provided with `X`. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features) or shape=(n_samples, n_features+len(params))]: The samples. Returns ------- * `y` [array, shape=(n_samples,)]: The predicted output values. """ return self.estimator_.predict(self._validate_X(X))