carl.learning module
This module implements machine learning algorithms and utilities, complementary to Scikit-Learn.
""" This module implements machine learning algorithms and utilities, complementary to Scikit-Learn. """ # Carl is free software; you can redistribute it and/or modify it # under the terms of the Revised BSD License; see LICENSE file for # more details. from .base import as_classifier from .base import check_cv from .calibration import CalibratedClassifierCV from .parameterize import make_parameterized_classification from .parameterize import ParameterStacker from .parameterize import ParameterizedClassifier from .parameterize import ParameterizedRegressor __all__ = ("as_classifier", "check_cv", "CalibratedClassifierCV", "make_parameterized_classification", "ParameterStacker", "ParameterizedClassifier", "ParameterizedRegressor",)
Functions
def as_classifier(
regressor)
Wrap a Scikit-Learn regressor into a binary classifier.
This function can be used to solve a binary classification problem as a
regression problem, where output labels {0,1} are treated as real values.
The wrapped regressor exhibits the classifier API, with the corresponding
predict, predict_proba and score methods.
Parameters
regressor[RegressorMixin]: The regressor object.
Returns
clf[ClassifierMixin]: The wrapped regressor, but with a classifier API.
def as_classifier(regressor): """Wrap a Scikit-Learn regressor into a binary classifier. This function can be used to solve a binary classification problem as a regression problem, where output labels {0,1} are treated as real values. The wrapped regressor exhibits the classifier API, with the corresponding `predict`, `predict_proba` and `score` methods. Parameters ---------- * `regressor` [`RegressorMixin`]: The regressor object. Returns ------- * `clf` [`ClassifierMixin`]: The wrapped regressor, but with a classifier API. """ class Wrapper(BaseEstimator, ClassifierMixin): def __init__(self, base_estimator): self.base_estimator = base_estimator def fit(self, X, y, **kwargs): # Check inputs X, y = check_X_y(X, y) # Convert y label_encoder = LabelEncoder() y = label_encoder.fit_transform(y).astype(np.float) if len(label_encoder.classes_) != 2: raise ValueError self.classes_ = label_encoder.classes_ # Fit regressor self.regressor_ = clone(self.base_estimator).fit(X, y, **kwargs) return self def predict(self, X): return np.where(self.predict_proba(X)[:, 1] >= 0.5, self.classes_[1], self.classes_[0]) def predict_proba(self, X): X = check_array(X) df = self.regressor_.predict(X) df = np.clip(df, 0., 1.) probas = np.zeros((len(X), 2)) probas[:, 0] = 1. - df probas[:, 1] = df return probas def score(self, X, y): return self.regressor_.score(X, y) return Wrapper(regressor)
def check_cv(
cv=3, X=None, y=None, classifier=False)
Input checker utility for building a cross-validator.
Parameters
-
cv[integer, cross-validation generator or an iterable, default=3]: Determines the cross-validation splitting strategy. Possible inputs for cv are:- integer, to specify the number of folds.
- An object to be used as a cross-validation generator.
- An iterable yielding train/test splits.
For integer/None inputs, if classifier is True and
yis either binary or multiclass,StratifiedKFoldused. In all other cases,KFoldis used. -
y[array-like, optional]: The target variable for supervised learning problems. -
classifier[boolean, default=False]: Whether the task is a classification task, in which case stratifiedKFoldwill be used.
Returns
checked_cv[a cross-validator instance]: The return value is a cross-validator which generates the train/test splits via thesplitmethod.
Note
This method is backported from scikit-learn 0.18.
def check_cv(cv=3, X=None, y=None, classifier=False): """Input checker utility for building a cross-validator. Parameters ---------- * `cv` [integer, cross-validation generator or an iterable, default=`3`]: Determines the cross-validation splitting strategy. Possible inputs for cv are: - integer, to specify the number of folds. - An object to be used as a cross-validation generator. - An iterable yielding train/test splits. For integer/None inputs, if classifier is True and `y` is either binary or multiclass, `StratifiedKFold` used. In all other cases, `KFold` is used. * `y` [array-like, optional]: The target variable for supervised learning problems. * `classifier` [boolean, default=`False`]: Whether the task is a classification task, in which case stratified `KFold` will be used. Returns ------- * `checked_cv` [a cross-validator instance]: The return value is a cross-validator which generates the train/test splits via the `split` method. Note ---- This method is backported from scikit-learn 0.18. """ return sklearn_check_cv(cv, y=y, classifier=classifier)
def make_parameterized_classification(
p0, p1, n_samples, params, random_state=None)
Generate parameterized classification data.
This function generates parameterized classification data, by enumerating
all possible combinations of provided parameter values and producing
samples in equal number from p0 and p1.
Parameters
-
p0[DistributionMixin]: The distribution to draw samples from class 0. -
p1[DistributionMixin]: The distribution to draw samples from class 1. -
n_samples[integer]: The total number of samples to generate. -
params[list of pairs (theano shared variables, list of values) or list of theano shared variables]: The list of parameters and the corresponding values to generate samples for. If only a list of theano shared variables is given, then generate samples using the current parameter values. -
random_state[integer or RandomState object]: The random seed.
Returns
-
X[array, shape=(n_samples, n_features+len(params))]: The generated training data, as sample features and concatenated parameter values. -
y[array, shape=(n_samples,)]: The labels.
def make_parameterized_classification(p0, p1, n_samples, params, random_state=None): """Generate parameterized classification data. This function generates parameterized classification data, by enumerating all possible combinations of provided parameter values and producing samples in equal number from `p0` and `p1`. Parameters ---------- * `p0` [`DistributionMixin`]: The distribution to draw samples from class 0. * `p1` [`DistributionMixin`]: The distribution to draw samples from class 1. * `n_samples` [integer]: The total number of samples to generate. * `params` [list of pairs (theano shared variables, list of values) or list of theano shared variables]: The list of parameters and the corresponding values to generate samples for. If only a list of theano shared variables is given, then generate samples using the current parameter values. * `random_state` [integer or RandomState object]: The random seed. Returns ------- * `X` [array, shape=(n_samples, n_features+len(params))]: The generated training data, as sample features and concatenated parameter values. * `y` [array, shape=(n_samples,)]: The labels. """ rng = check_random_state(random_state) if not isinstance(params[0], tuple): X0 = p0.rvs(n_samples // 2, random_state=rng) X1 = p1.rvs(n_samples - (n_samples // 2), random_state=rng) X = ParameterStacker(params).transform(np.vstack((X0, X1))) y = np.zeros(n_samples) y[len(X0):] = 1 return X, y elif isinstance(params[0], tuple): combinations = list(product(*[values for _, values in params])) all_X = [] all_y = [] for c in combinations: for i, v in enumerate(c): params[i][0].set_value(v) X, y = make_parameterized_classification( p0, p1, n_samples // len(combinations), [p for p, _ in params], random_state=rng) all_X.append(X) all_y.append(y) X = np.vstack(all_X) y = np.concatenate(all_y) return X, y else: raise ValueError
Classes
class CalibratedClassifierCV
Probability calibration.
With this class, the base_estimator is fit on the train set of the
cross-validation generator and the test set is used for calibration. The
probabilities for each of the folds are then averaged for prediction.
class CalibratedClassifierCV(BaseEstimator, ClassifierMixin): """Probability calibration. With this class, the `base_estimator` is fit on the train set of the cross-validation generator and the test set is used for calibration. The probabilities for each of the folds are then averaged for prediction. """ def __init__(self, base_estimator, method="histogram", bins="auto", interpolation=None, variable_width=False, cv=1): """Constructor. Parameters ---------- * `base_estimator` [`ClassifierMixin`]: The classifier whose output decision function needs to be calibrated to offer more accurate predict_proba outputs. If `cv=prefit`, the classifier must have been fit already on data. * `method` [string]: The method to use for calibration. Supported methods include `"histogram"`, `"kde"`, `"isotonic"`, `"interpolated-isotonic"` and `"sigmoid"`. * `bins` [int, default="auto"]: The number of bins, if `method` is `"histogram"`. * `interpolation` [string, optional] Specifies the kind of interpolation between bins as a string (`"linear"`, `"nearest"`, `"zero"`, `"slinear"`, `"quadratic"`, `"cubic"`), if `method` is `"histogram"`. * `variable_dith_width` [boolean, optional] If True use equal probability variable length bins, if `method` is `"histogram"`. * `cv` [integer, cross-validation generator, iterable or `"prefit"`]: Determines the cross-validation splitting strategy. Possible inputs for cv are: - integer, to specify the number of folds. - An object to be used as a cross-validation generator. - An iterable yielding train/test splits. If `"prefit"` is passed, it is assumed that base_estimator has been fitted already and all data is used for calibration. If `cv=1`, the training data is used for both training and calibration. """ self.base_estimator = base_estimator self.method = method self.bins = bins self.interpolation = interpolation self.variable_width = variable_width self.cv = cv def fit(self, X, y, sample_weight=None): """Fit the calibrated model. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: Training data. * `y` [array-like, shape=(n_samples,)]: Target values. Returns ------- * `self` [object]: `self`. """ # Check inputs X, y = check_X_y(X, y) # Convert y label_encoder = LabelEncoder() y = label_encoder.fit_transform(y).astype(np.float) if len(label_encoder.classes_) != 2: raise ValueError self.classes_ = label_encoder.classes_ # Calibrator if self.method == "histogram": base_calibrator = HistogramCalibrator( bins=self.bins, interpolation=self.interpolation, variable_width=self.variable_width) elif self.method == "kde": base_calibrator = KernelDensityCalibrator() elif self.method == "isotonic": base_calibrator = IsotonicCalibrator() elif self.method == "interpolated-isotonic": base_calibrator = IsotonicCalibrator(interpolation=True) elif self.method == "sigmoid": base_calibrator = SigmoidCalibrator() else: base_calibrator = self.method # Fit if self.cv == "prefit" or self.cv == 1: # Classifier if self.cv == 1: clf = clone(self.base_estimator) if isinstance(clf, RegressorMixin): clf = as_classifier(clf) if sample_weight is None: clf.fit(X, y) else: clf.fit(X, y, sample_weight=sample_weight) else: clf = self.base_estimator self.classifiers_ = [clf] # Calibrator calibrator = clone(base_calibrator) T = clf.predict_proba(X)[:, 1] if sample_weight is None: calibrator.fit(T, y) else: calibrator.fit(T, y, sample_weight=sample_weight) self.calibrators_ = [calibrator] else: self.classifiers_ = [] self.calibrators_ = [] cv = check_cv(self.cv, X=X, y=y, classifier=True) for train, calibrate in cv.split(X, y): # Classifier clf = clone(self.base_estimator) if isinstance(clf, RegressorMixin): clf = as_classifier(clf) if sample_weight is None: clf.fit(X[train], y[train]) else: clf.fit(X[train], y[train], sample_weight=sample_weight[train]) self.classifiers_.append(clf) # Calibrator calibrator = clone(base_calibrator) T = clf.predict_proba(X[calibrate])[:, 1] if sample_weight is None: calibrator.fit(T, y[calibrate]) else: calibrator.fit(T, y[calibrate], sample_weight=sample_weight[calibrate]) self.calibrators_.append(calibrator) return self def predict(self, X): """Predict the targets for `X`. Can be different from the predictions of the uncalibrated classifier. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. Returns ------- * `y` [array, shape=(n_samples,)]: The predicted class. """ return np.where(self.predict_proba(X)[:, 1] >= 0.5, self.classes_[1], self.classes_[0]) def predict_proba(self, X): """Predict the posterior probabilities of classification for `X`. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. Returns ------- * `probas` [array, shape=(n_samples, n_classes)]: The predicted probabilities. """ p = np.zeros((len(X), 2)) for clf, calibrator in zip(self.classifiers_, self.calibrators_): p[:, 1] += calibrator.predict(clf.predict_proba(X)[:, 1]) p[:, 1] /= len(self.classifiers_) p[:, 0] = 1. - p[:, 1] return p def _clone(self): estimator = clone(self, original=True) if self.cv == "prefit": estimator.base_estimator = self.base_estimator return estimator
Ancestors (in MRO)
- CalibratedClassifierCV
- sklearn.base.BaseEstimator
- sklearn.base.ClassifierMixin
- builtins.object
Static methods
def __init__(
self, base_estimator, method='histogram', bins='auto', interpolation=None, variable_width=False, cv=1)
Constructor.
Parameters
-
base_estimator[ClassifierMixin]: The classifier whose output decision function needs to be calibrated to offer more accurate predict_proba outputs. Ifcv=prefit, the classifier must have been fit already on data. -
method[string]: The method to use for calibration. Supported methods include"histogram","kde","isotonic","interpolated-isotonic"and"sigmoid". -
bins[int, default="auto"]: The number of bins, ifmethodis"histogram". -
interpolation[string, optional] Specifies the kind of interpolation between bins as a string ("linear","nearest","zero","slinear","quadratic","cubic"), ifmethodis"histogram". -
variable_dith_width[boolean, optional] If True use equal probability variable length bins, ifmethodis"histogram". -
cv[integer, cross-validation generator, iterable or"prefit"]: Determines the cross-validation splitting strategy. Possible inputs for cv are:- integer, to specify the number of folds.
- An object to be used as a cross-validation generator.
- An iterable yielding train/test splits.
If
"prefit"is passed, it is assumed that base_estimator has been fitted already and all data is used for calibration. Ifcv=1, the training data is used for both training and calibration.
def __init__(self, base_estimator, method="histogram", bins="auto", interpolation=None, variable_width=False, cv=1): """Constructor. Parameters ---------- * `base_estimator` [`ClassifierMixin`]: The classifier whose output decision function needs to be calibrated to offer more accurate predict_proba outputs. If `cv=prefit`, the classifier must have been fit already on data. * `method` [string]: The method to use for calibration. Supported methods include `"histogram"`, `"kde"`, `"isotonic"`, `"interpolated-isotonic"` and `"sigmoid"`. * `bins` [int, default="auto"]: The number of bins, if `method` is `"histogram"`. * `interpolation` [string, optional] Specifies the kind of interpolation between bins as a string (`"linear"`, `"nearest"`, `"zero"`, `"slinear"`, `"quadratic"`, `"cubic"`), if `method` is `"histogram"`. * `variable_dith_width` [boolean, optional] If True use equal probability variable length bins, if `method` is `"histogram"`. * `cv` [integer, cross-validation generator, iterable or `"prefit"`]: Determines the cross-validation splitting strategy. Possible inputs for cv are: - integer, to specify the number of folds. - An object to be used as a cross-validation generator. - An iterable yielding train/test splits. If `"prefit"` is passed, it is assumed that base_estimator has been fitted already and all data is used for calibration. If `cv=1`, the training data is used for both training and calibration. """ self.base_estimator = base_estimator self.method = method self.bins = bins self.interpolation = interpolation self.variable_width = variable_width self.cv = cv
def fit(
self, X, y, sample_weight=None)
Fit the calibrated model.
Parameters
-
X[array-like, shape=(n_samples, n_features)]: Training data. -
y[array-like, shape=(n_samples,)]: Target values.
Returns
self[object]:self.
def fit(self, X, y, sample_weight=None): """Fit the calibrated model. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: Training data. * `y` [array-like, shape=(n_samples,)]: Target values. Returns ------- * `self` [object]: `self`. """ # Check inputs X, y = check_X_y(X, y) # Convert y label_encoder = LabelEncoder() y = label_encoder.fit_transform(y).astype(np.float) if len(label_encoder.classes_) != 2: raise ValueError self.classes_ = label_encoder.classes_ # Calibrator if self.method == "histogram": base_calibrator = HistogramCalibrator( bins=self.bins, interpolation=self.interpolation, variable_width=self.variable_width) elif self.method == "kde": base_calibrator = KernelDensityCalibrator() elif self.method == "isotonic": base_calibrator = IsotonicCalibrator() elif self.method == "interpolated-isotonic": base_calibrator = IsotonicCalibrator(interpolation=True) elif self.method == "sigmoid": base_calibrator = SigmoidCalibrator() else: base_calibrator = self.method # Fit if self.cv == "prefit" or self.cv == 1: # Classifier if self.cv == 1: clf = clone(self.base_estimator) if isinstance(clf, RegressorMixin): clf = as_classifier(clf) if sample_weight is None: clf.fit(X, y) else: clf.fit(X, y, sample_weight=sample_weight) else: clf = self.base_estimator self.classifiers_ = [clf] # Calibrator calibrator = clone(base_calibrator) T = clf.predict_proba(X)[:, 1] if sample_weight is None: calibrator.fit(T, y) else: calibrator.fit(T, y, sample_weight=sample_weight) self.calibrators_ = [calibrator] else: self.classifiers_ = [] self.calibrators_ = [] cv = check_cv(self.cv, X=X, y=y, classifier=True) for train, calibrate in cv.split(X, y): # Classifier clf = clone(self.base_estimator) if isinstance(clf, RegressorMixin): clf = as_classifier(clf) if sample_weight is None: clf.fit(X[train], y[train]) else: clf.fit(X[train], y[train], sample_weight=sample_weight[train]) self.classifiers_.append(clf) # Calibrator calibrator = clone(base_calibrator) T = clf.predict_proba(X[calibrate])[:, 1] if sample_weight is None: calibrator.fit(T, y[calibrate]) else: calibrator.fit(T, y[calibrate], sample_weight=sample_weight[calibrate]) self.calibrators_.append(calibrator) return self
def predict(
self, X)
Predict the targets for X.
Can be different from the predictions of the uncalibrated classifier.
Parameters
X[array-like, shape=(n_samples, n_features)]: The samples.
Returns
y[array, shape=(n_samples,)]: The predicted class.
def predict(self, X): """Predict the targets for `X`. Can be different from the predictions of the uncalibrated classifier. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. Returns ------- * `y` [array, shape=(n_samples,)]: The predicted class. """ return np.where(self.predict_proba(X)[:, 1] >= 0.5, self.classes_[1], self.classes_[0])
def predict_proba(
self, X)
Predict the posterior probabilities of classification for X.
Parameters
X[array-like, shape=(n_samples, n_features)]: The samples.
Returns
probas[array, shape=(n_samples, n_classes)]: The predicted probabilities.
def predict_proba(self, X): """Predict the posterior probabilities of classification for `X`. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. Returns ------- * `probas` [array, shape=(n_samples, n_classes)]: The predicted probabilities. """ p = np.zeros((len(X), 2)) for clf, calibrator in zip(self.classifiers_, self.calibrators_): p[:, 1] += calibrator.predict(clf.predict_proba(X)[:, 1]) p[:, 1] /= len(self.classifiers_) p[:, 0] = 1. - p[:, 1] return p
Instance variables
var base_estimator
var bins
var cv
var interpolation
var method
var variable_width
class ParameterStacker
Stack current parameter values as additional features.
class ParameterStacker(BaseEstimator, TransformerMixin): """Stack current parameter values as additional features.""" def __init__(self, params): """Constructor. Parameters ---------- * `params` [list of Theano shared variables]: The parameters. """ self.params = params def transform(self, X, y=None): """Stack current parameter values as additional features. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. Returns ------- * `Xt` [array, shape=(n_samples, n_features+len(params))]: The horizontal concatenation of X with the current parameter values, added as new columns. """ Xp = np.empty((len(X), len(self.params))) for i, p in enumerate(self.params): Xp[:, i] = p.eval() return np.hstack((X, Xp))
Ancestors (in MRO)
- ParameterStacker
- sklearn.base.BaseEstimator
- sklearn.base.TransformerMixin
- builtins.object
Static methods
def __init__(
self, params)
Constructor.
Parameters
params[list of Theano shared variables]: The parameters.
def __init__(self, params): """Constructor. Parameters ---------- * `params` [list of Theano shared variables]: The parameters. """ self.params = params
def transform(
self, X, y=None)
Stack current parameter values as additional features.
Parameters
X[array-like, shape=(n_samples, n_features)]: The samples.
Returns
Xt[array, shape=(n_samples, n_features+len(params))]: The horizontal concatenation of X with the current parameter values, added as new columns.
def transform(self, X, y=None): """Stack current parameter values as additional features. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. Returns ------- * `Xt` [array, shape=(n_samples, n_features+len(params))]: The horizontal concatenation of X with the current parameter values, added as new columns. """ Xp = np.empty((len(X), len(self.params))) for i, p in enumerate(self.params): Xp[:, i] = p.eval() return np.hstack((X, Xp))
Instance variables
var params
class ParameterizedClassifier
Parameterize a Scikit-Learn classifier.
This wrapper can be used to learn a parameterized classification problem, where parameter values are automatically added as additional features.
class ParameterizedClassifier(_ParameterizedEstimator, ClassifierMixin): """Parameterize a Scikit-Learn classifier. This wrapper can be used to learn a parameterized classification problem, where parameter values are automatically added as additional features. """ def predict_proba(self, X): """Predict the posterior probabilities of classification for X. Parameter values are automatically appended from the current state of the parameters if those are not provided with X. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features) or shape=(n_samples, n_features+len(params))]: The samples. Returns ------- * `probas` [array, shape=(n_samples, n_classes)]: The predicted probabilities. """ return self.estimator_.predict_proba(self._validate_X(X))
Ancestors (in MRO)
- ParameterizedClassifier
- carl.learning.parameterize._ParameterizedEstimator
- sklearn.base.BaseEstimator
- sklearn.base.ClassifierMixin
- builtins.object
Static methods
def __init__(
self, base_estimator, params)
Constructor.
Parameters
-
base_estimator[BaseEstimator]: The estimator to parameterize. -
params[list of Theano shared variables]: The parameters.
def __init__(self, base_estimator, params): """Constructor. Parameters ---------- * `base_estimator` [`BaseEstimator`]: The estimator to parameterize. * `params` [list of Theano shared variables]: The parameters. """ self.base_estimator = base_estimator self.params = params
def fit(
self, X, y)
Fit estimator on parameterized data.
Parameters
-
X[array-like, shape=(n_samples, n_features+len(params))]: The samples, concatenated with the corresponding parameter values. -
y[array-like, shape=(n_samples,)]: The output values.
Returns
self[object]:self.
def fit(self, X, y): """Fit estimator on parameterized data. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features+len(params))]: The samples, concatenated with the corresponding parameter values. * `y` [array-like, shape=(n_samples,)]: The output values. Returns ------- * `self` [object]: `self`. """ self.stacker_ = ParameterStacker(self.params) # XXX: this assumes that X is extended with parameters self.n_features_ = X.shape[1] - len(self.params) self.estimator_ = clone(self.base_estimator).fit(X, y) return self
def predict(
self, X)
Predict the targets for X.
Parameter values are automatically appended from the current state
of the parameters if those are not provided with X.
Parameters
X[array-like, shape=(n_samples, n_features) or shape=(n_samples, n_features+len(params))]: The samples.
Returns
y[array, shape=(n_samples,)]: The predicted output values.
def predict(self, X): """Predict the targets for `X`. Parameter values are automatically appended from the current state of the parameters if those are not provided with `X`. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features) or shape=(n_samples, n_features+len(params))]: The samples. Returns ------- * `y` [array, shape=(n_samples,)]: The predicted output values. """ return self.estimator_.predict(self._validate_X(X))
def predict_proba(
self, X)
Predict the posterior probabilities of classification for X.
Parameter values are automatically appended from the current state of the parameters if those are not provided with X.
Parameters
X[array-like, shape=(n_samples, n_features) or shape=(n_samples, n_features+len(params))]: The samples.
Returns
probas[array, shape=(n_samples, n_classes)]: The predicted probabilities.
def predict_proba(self, X): """Predict the posterior probabilities of classification for X. Parameter values are automatically appended from the current state of the parameters if those are not provided with X. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features) or shape=(n_samples, n_features+len(params))]: The samples. Returns ------- * `probas` [array, shape=(n_samples, n_classes)]: The predicted probabilities. """ return self.estimator_.predict_proba(self._validate_X(X))
class ParameterizedRegressor
Parameterize a Scikit-Learn regressor.
This wrapper can be used to learn a parameterized regression problem, where parameter values are automatically added as additional features.
class ParameterizedRegressor(_ParameterizedEstimator, RegressorMixin): """Parameterize a Scikit-Learn regressor. This wrapper can be used to learn a parameterized regression problem, where parameter values are automatically added as additional features. """ pass
Ancestors (in MRO)
- ParameterizedRegressor
- carl.learning.parameterize._ParameterizedEstimator
- sklearn.base.BaseEstimator
- sklearn.base.RegressorMixin
- builtins.object
Static methods
def __init__(
self, base_estimator, params)
Constructor.
Parameters
-
base_estimator[BaseEstimator]: The estimator to parameterize. -
params[list of Theano shared variables]: The parameters.
def __init__(self, base_estimator, params): """Constructor. Parameters ---------- * `base_estimator` [`BaseEstimator`]: The estimator to parameterize. * `params` [list of Theano shared variables]: The parameters. """ self.base_estimator = base_estimator self.params = params
def fit(
self, X, y)
Fit estimator on parameterized data.
Parameters
-
X[array-like, shape=(n_samples, n_features+len(params))]: The samples, concatenated with the corresponding parameter values. -
y[array-like, shape=(n_samples,)]: The output values.
Returns
self[object]:self.
def fit(self, X, y): """Fit estimator on parameterized data. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features+len(params))]: The samples, concatenated with the corresponding parameter values. * `y` [array-like, shape=(n_samples,)]: The output values. Returns ------- * `self` [object]: `self`. """ self.stacker_ = ParameterStacker(self.params) # XXX: this assumes that X is extended with parameters self.n_features_ = X.shape[1] - len(self.params) self.estimator_ = clone(self.base_estimator).fit(X, y) return self
def predict(
self, X)
Predict the targets for X.
Parameter values are automatically appended from the current state
of the parameters if those are not provided with X.
Parameters
X[array-like, shape=(n_samples, n_features) or shape=(n_samples, n_features+len(params))]: The samples.
Returns
y[array, shape=(n_samples,)]: The predicted output values.
def predict(self, X): """Predict the targets for `X`. Parameter values are automatically appended from the current state of the parameters if those are not provided with `X`. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features) or shape=(n_samples, n_features+len(params))]: The samples. Returns ------- * `y` [array, shape=(n_samples,)]: The predicted output values. """ return self.estimator_.predict(self._validate_X(X))
Top