Fork me on GitHub

Top

carl.learning module

This module implements machine learning algorithms and utilities, complementary to Scikit-Learn.

Show source ≡

"""
This module implements machine learning algorithms and utilities,
complementary to Scikit-Learn.
"""

# Carl is free software; you can redistribute it and/or modify it
# under the terms of the Revised BSD License; see LICENSE file for
# more details.

from .base import as_classifier
from .base import check_cv
from .calibration import CalibratedClassifierCV
from .parameterize import make_parameterized_classification
from .parameterize import ParameterStacker
from .parameterize import ParameterizedClassifier
from .parameterize import ParameterizedRegressor


__all__ = ("as_classifier",
           "check_cv",
           "CalibratedClassifierCV",
           "make_parameterized_classification",
           "ParameterStacker",
           "ParameterizedClassifier",
           "ParameterizedRegressor",)

Functions

def as_classifier(

regressor)

Wrap a Scikit-Learn regressor into a binary classifier.

This function can be used to solve a binary classification problem as a regression problem, where output labels {0,1} are treated as real values. The wrapped regressor exhibits the classifier API, with the corresponding predict, predict_proba and score methods.

Parameters

regressor [RegressorMixin]: The regressor object.

Returns

clf [ClassifierMixin]: The wrapped regressor, but with a classifier API.

Show source ≡

def as_classifier(regressor):
    """Wrap a Scikit-Learn regressor into a binary classifier.

    This function can be used to solve a binary classification problem as a
    regression problem, where output labels {0,1} are treated as real values.
    The wrapped regressor exhibits the classifier API, with the corresponding
    `predict`, `predict_proba` and `score` methods.

    Parameters
    ----------
    * `regressor` [`RegressorMixin`]:
        The regressor object.

    Returns
    -------
    * `clf` [`ClassifierMixin`]:
        The wrapped regressor, but with a classifier API.
    """
    class Wrapper(BaseEstimator, ClassifierMixin):
        def __init__(self, base_estimator):
            self.base_estimator = base_estimator

        def fit(self, X, y, **kwargs):
            # Check inputs
            X, y = check_X_y(X, y)

            # Convert y
            label_encoder = LabelEncoder()
            y = label_encoder.fit_transform(y).astype(np.float)

            if len(label_encoder.classes_) != 2:
                raise ValueError

            self.classes_ = label_encoder.classes_

            # Fit regressor
            self.regressor_ = clone(self.base_estimator).fit(X, y, **kwargs)

            return self

        def predict(self, X):
            return np.where(self.predict_proba(X)[:, 1] >= 0.5,
                            self.classes_[1],
                            self.classes_[0])

        def predict_proba(self, X):
            X = check_array(X)

            df = self.regressor_.predict(X)
            df = np.clip(df, 0., 1.)
            probas = np.zeros((len(X), 2))
            probas[:, 0] = 1. - df
            probas[:, 1] = df

            return probas

        def score(self, X, y):
            return self.regressor_.score(X, y)

    return Wrapper(regressor)

def check_cv(

cv=3, X=None, y=None, classifier=False)

Input checker utility for building a cross-validator.

Parameters

cv [integer, cross-validation generator or an iterable, default=3]: Determines the cross-validation splitting strategy. Possible inputs for cv are:
- integer, to specify the number of folds.
- An object to be used as a cross-validation generator.
- An iterable yielding train/test splits.
For integer/None inputs, if classifier is True and y is either binary or multiclass, StratifiedKFold used. In all other cases, KFold is used.
y [array-like, optional]: The target variable for supervised learning problems.
classifier [boolean, default=False]: Whether the task is a classification task, in which case stratified KFold will be used.

Returns

checked_cv [a cross-validator instance]: The return value is a cross-validator which generates the train/test splits via the split method.

Note

This method is backported from scikit-learn 0.18.

Show source ≡

def check_cv(cv=3, X=None, y=None, classifier=False):
    """Input checker utility for building a cross-validator.

    Parameters
    ----------
    * `cv` [integer, cross-validation generator or an iterable, default=`3`]:
        Determines the cross-validation splitting strategy.
        Possible inputs for cv are:

          - integer, to specify the number of folds.
          - An object to be used as a cross-validation generator.
          - An iterable yielding train/test splits.

        For integer/None inputs, if classifier is True and `y` is either
        binary or multiclass, `StratifiedKFold` used. In all other
        cases, `KFold` is used.

    * `y` [array-like, optional]:
        The target variable for supervised learning problems.

    * `classifier` [boolean, default=`False`]:
        Whether the task is a classification task, in which case
        stratified `KFold` will be used.

    Returns
    -------
    * `checked_cv` [a cross-validator instance]:
        The return value is a cross-validator which generates the train/test
        splits via the `split` method.

    Note
    ----
    This method is backported from scikit-learn 0.18.
    """
    return sklearn_check_cv(cv, y=y, classifier=classifier)

def make_parameterized_classification(

p0, p1, n_samples, params, random_state=None)

Generate parameterized classification data.

This function generates parameterized classification data, by enumerating all possible combinations of provided parameter values and producing samples in equal number from p0 and p1.

Parameters

p0 [DistributionMixin]: The distribution to draw samples from class 0.
p1 [DistributionMixin]: The distribution to draw samples from class 1.
n_samples [integer]: The total number of samples to generate.
params [list of pairs (theano shared variables, list of values) or list of theano shared variables]: The list of parameters and the corresponding values to generate samples for. If only a list of theano shared variables is given, then generate samples using the current parameter values.
random_state [integer or RandomState object]: The random seed.

Returns

X [array, shape=(n_samples, n_features+len(params))]: The generated training data, as sample features and concatenated parameter values.
y [array, shape=(n_samples,)]: The labels.

Show source ≡

def make_parameterized_classification(p0, p1, n_samples, params,
                                      random_state=None):
    """Generate parameterized classification data.

    This function generates parameterized classification data, by enumerating
    all possible combinations of provided parameter values and producing
    samples in equal number from `p0` and `p1`.

    Parameters
    ----------
    * `p0` [`DistributionMixin`]:
        The distribution to draw samples from class 0.

    * `p1` [`DistributionMixin`]:
        The distribution to draw samples from class 1.

    * `n_samples` [integer]:
        The total number of samples to generate.

    * `params` [list of pairs (theano shared variables, list of values) or
                list of theano shared variables]:
        The list of parameters and the corresponding values to generate
        samples for. If only a list of theano shared variables is given, then
        generate samples using the current parameter values.

    * `random_state` [integer or RandomState object]:
        The random seed.

    Returns
    -------
    * `X` [array, shape=(n_samples, n_features+len(params))]:
        The generated training data, as sample features and concatenated
        parameter values.

    * `y` [array, shape=(n_samples,)]:
        The labels.
    """
    rng = check_random_state(random_state)

    if not isinstance(params[0], tuple):
        X0 = p0.rvs(n_samples // 2, random_state=rng)
        X1 = p1.rvs(n_samples - (n_samples // 2), random_state=rng)
        X = ParameterStacker(params).transform(np.vstack((X0, X1)))
        y = np.zeros(n_samples)
        y[len(X0):] = 1

        return X, y

    elif isinstance(params[0], tuple):
        combinations = list(product(*[values for _, values in params]))

        all_X = []
        all_y = []

        for c in combinations:
            for i, v in enumerate(c):
                params[i][0].set_value(v)

            X, y = make_parameterized_classification(
                p0, p1,
                n_samples // len(combinations),
                [p for p, _ in params],
                random_state=rng)

            all_X.append(X)
            all_y.append(y)

        X = np.vstack(all_X)
        y = np.concatenate(all_y)

        return X, y

    else:
        raise ValueError

Classes

class CalibratedClassifierCV

Probability calibration.

With this class, the base_estimator is fit on the train set of the cross-validation generator and the test set is used for calibration. The probabilities for each of the folds are then averaged for prediction.

Show source ≡

class CalibratedClassifierCV(BaseEstimator, ClassifierMixin):
    """Probability calibration.

    With this class, the `base_estimator` is fit on the train set of the
    cross-validation generator and the test set is used for calibration. The
    probabilities for each of the folds are then averaged for prediction.
    """

    def __init__(self, base_estimator, method="histogram", bins="auto",
                 interpolation=None, variable_width=False, cv=1):
        """Constructor.

        Parameters
        ----------
        * `base_estimator` [`ClassifierMixin`]:
            The classifier whose output decision function needs to be
            calibrated to offer more accurate predict_proba outputs. If
            `cv=prefit`, the classifier must have been fit already on data.

        * `method` [string]:
            The method to use for calibration. Supported methods include
            `"histogram"`, `"kde"`, `"isotonic"`, `"interpolated-isotonic"` and
            `"sigmoid"`.

        * `bins` [int, default="auto"]:
            The number of bins, if `method` is `"histogram"`.

        * `interpolation` [string, optional]
            Specifies the kind of interpolation between bins as a string
            (`"linear"`, `"nearest"`, `"zero"`, `"slinear"`, `"quadratic"`,
            `"cubic"`), if `method` is `"histogram"`.

        * `variable_dith_width` [boolean, optional]
            If True use equal probability variable length bins, if
            `method` is `"histogram"`.

        * `cv` [integer, cross-validation generator, iterable or `"prefit"`]:
            Determines the cross-validation splitting strategy.
            Possible inputs for cv are:

            - integer, to specify the number of folds.
            - An object to be used as a cross-validation generator.
            - An iterable yielding train/test splits.

            If `"prefit"` is passed, it is assumed that base_estimator has been
            fitted already and all data is used for calibration. If `cv=1`,
            the training data is used for both training and calibration.
        """
        self.base_estimator = base_estimator
        self.method = method
        self.bins = bins
        self.interpolation = interpolation
        self.variable_width = variable_width
        self.cv = cv

    def fit(self, X, y, sample_weight=None):
        """Fit the calibrated model.

        Parameters
        ----------
        * `X` [array-like, shape=(n_samples, n_features)]:
            Training data.

        * `y` [array-like, shape=(n_samples,)]:
            Target values.

        Returns
        -------
        * `self` [object]:
            `self`.
        """
        # Check inputs
        X, y = check_X_y(X, y)

        # Convert y
        label_encoder = LabelEncoder()
        y = label_encoder.fit_transform(y).astype(np.float)

        if len(label_encoder.classes_) != 2:
            raise ValueError

        self.classes_ = label_encoder.classes_
        # Calibrator
        if self.method == "histogram":
            base_calibrator = HistogramCalibrator(
                bins=self.bins, interpolation=self.interpolation,
                variable_width=self.variable_width)
        elif self.method == "kde":
            base_calibrator = KernelDensityCalibrator()
        elif self.method == "isotonic":
            base_calibrator = IsotonicCalibrator()
        elif self.method == "interpolated-isotonic":
            base_calibrator = IsotonicCalibrator(interpolation=True)
        elif self.method == "sigmoid":
            base_calibrator = SigmoidCalibrator()
        else:
            base_calibrator = self.method
        # Fit
        if self.cv == "prefit" or self.cv == 1:
            # Classifier
            if self.cv == 1:
                clf = clone(self.base_estimator)

                if isinstance(clf, RegressorMixin):
                    clf = as_classifier(clf)

                if sample_weight is None:
                    clf.fit(X, y)
                else:
                    clf.fit(X, y, sample_weight=sample_weight)

            else:
                clf = self.base_estimator

            self.classifiers_ = [clf]

            # Calibrator
            calibrator = clone(base_calibrator)
            T = clf.predict_proba(X)[:, 1]

            if sample_weight is None:
                calibrator.fit(T, y)
            else:
                calibrator.fit(T, y, sample_weight=sample_weight)

            self.calibrators_ = [calibrator]

        else:
            self.classifiers_ = []
            self.calibrators_ = []

            cv = check_cv(self.cv, X=X, y=y, classifier=True)

            for train, calibrate in cv.split(X, y):
                # Classifier
                clf = clone(self.base_estimator)

                if isinstance(clf, RegressorMixin):
                    clf = as_classifier(clf)

                if sample_weight is None:
                    clf.fit(X[train], y[train])
                else:
                    clf.fit(X[train], y[train],
                            sample_weight=sample_weight[train])

                self.classifiers_.append(clf)

                # Calibrator
                calibrator = clone(base_calibrator)
                T = clf.predict_proba(X[calibrate])[:, 1]

                if sample_weight is None:
                    calibrator.fit(T, y[calibrate])
                else:
                    calibrator.fit(T, y[calibrate],
                                   sample_weight=sample_weight[calibrate])

                self.calibrators_.append(calibrator)

        return self

    def predict(self, X):
        """Predict the targets for `X`.

        Can be different from the predictions of the uncalibrated classifier.

        Parameters
        ----------
        * `X` [array-like, shape=(n_samples, n_features)]:
            The samples.

        Returns
        -------
        * `y` [array, shape=(n_samples,)]:
            The predicted class.
        """
        return np.where(self.predict_proba(X)[:, 1] >= 0.5,
                        self.classes_[1],
                        self.classes_[0])

    def predict_proba(self, X):
        """Predict the posterior probabilities of classification for `X`.

        Parameters
        ----------
        * `X` [array-like, shape=(n_samples, n_features)]:
            The samples.

        Returns
        -------
        * `probas` [array, shape=(n_samples, n_classes)]:
            The predicted probabilities.
        """
        p = np.zeros((len(X), 2))

        for clf, calibrator in zip(self.classifiers_, self.calibrators_):
            p[:, 1] += calibrator.predict(clf.predict_proba(X)[:, 1])

        p[:, 1] /= len(self.classifiers_)
        p[:, 0] = 1. - p[:, 1]

        return p

    def _clone(self):
        estimator = clone(self, original=True)
        if self.cv == "prefit":
            estimator.base_estimator = self.base_estimator

        return estimator

Ancestors (in MRO)

CalibratedClassifierCV
sklearn.base.BaseEstimator
sklearn.base.ClassifierMixin
builtins.object

Static methods

def __init__(

self, base_estimator, method='histogram', bins='auto', interpolation=None, variable_width=False, cv=1)

Constructor.

Parameters

base_estimator [ClassifierMixin]: The classifier whose output decision function needs to be calibrated to offer more accurate predict_proba outputs. If cv=prefit, the classifier must have been fit already on data.
method [string]: The method to use for calibration. Supported methods include "histogram", "kde", "isotonic", "interpolated-isotonic" and "sigmoid".
bins [int, default="auto"]: The number of bins, if method is "histogram".
interpolation [string, optional] Specifies the kind of interpolation between bins as a string ("linear", "nearest", "zero", "slinear", "quadratic", "cubic"), if method is "histogram".
variable_dith_width [boolean, optional] If True use equal probability variable length bins, if method is "histogram".
cv [integer, cross-validation generator, iterable or "prefit"]: Determines the cross-validation splitting strategy. Possible inputs for cv are:
- integer, to specify the number of folds.
- An object to be used as a cross-validation generator.
- An iterable yielding train/test splits.
If "prefit" is passed, it is assumed that base_estimator has been fitted already and all data is used for calibration. If cv=1, the training data is used for both training and calibration.

Show source ≡

def __init__(self, base_estimator, method="histogram", bins="auto",
             interpolation=None, variable_width=False, cv=1):
    """Constructor.
    Parameters
    ----------
    * `base_estimator` [`ClassifierMixin`]:
        The classifier whose output decision function needs to be
        calibrated to offer more accurate predict_proba outputs. If
        `cv=prefit`, the classifier must have been fit already on data.
    * `method` [string]:
        The method to use for calibration. Supported methods include
        `"histogram"`, `"kde"`, `"isotonic"`, `"interpolated-isotonic"` and
        `"sigmoid"`.
    * `bins` [int, default="auto"]:
        The number of bins, if `method` is `"histogram"`.
    * `interpolation` [string, optional]
        Specifies the kind of interpolation between bins as a string
        (`"linear"`, `"nearest"`, `"zero"`, `"slinear"`, `"quadratic"`,
        `"cubic"`), if `method` is `"histogram"`.
    * `variable_dith_width` [boolean, optional]
        If True use equal probability variable length bins, if
        `method` is `"histogram"`.
    * `cv` [integer, cross-validation generator, iterable or `"prefit"`]:
        Determines the cross-validation splitting strategy.
        Possible inputs for cv are:
        - integer, to specify the number of folds.
        - An object to be used as a cross-validation generator.
        - An iterable yielding train/test splits.
        If `"prefit"` is passed, it is assumed that base_estimator has been
        fitted already and all data is used for calibration. If `cv=1`,
        the training data is used for both training and calibration.
    """
    self.base_estimator = base_estimator
    self.method = method
    self.bins = bins
    self.interpolation = interpolation
    self.variable_width = variable_width
    self.cv = cv

def fit(

self, X, y, sample_weight=None)

Fit the calibrated model.

Parameters

X [array-like, shape=(n_samples, n_features)]: Training data.
y [array-like, shape=(n_samples,)]: Target values.

Returns

self [object]: self.

Show source ≡

def fit(self, X, y, sample_weight=None):
    """Fit the calibrated model.
    Parameters
    ----------
    * `X` [array-like, shape=(n_samples, n_features)]:
        Training data.
    * `y` [array-like, shape=(n_samples,)]:
        Target values.
    Returns
    -------
    * `self` [object]:
        `self`.
    """
    # Check inputs
    X, y = check_X_y(X, y)
    # Convert y
    label_encoder = LabelEncoder()
    y = label_encoder.fit_transform(y).astype(np.float)
    if len(label_encoder.classes_) != 2:
        raise ValueError
    self.classes_ = label_encoder.classes_
    # Calibrator
    if self.method == "histogram":
        base_calibrator = HistogramCalibrator(
            bins=self.bins, interpolation=self.interpolation,
            variable_width=self.variable_width)
    elif self.method == "kde":
        base_calibrator = KernelDensityCalibrator()
    elif self.method == "isotonic":
        base_calibrator = IsotonicCalibrator()
    elif self.method == "interpolated-isotonic":
        base_calibrator = IsotonicCalibrator(interpolation=True)
    elif self.method == "sigmoid":
        base_calibrator = SigmoidCalibrator()
    else:
        base_calibrator = self.method
    # Fit
    if self.cv == "prefit" or self.cv == 1:
        # Classifier
        if self.cv == 1:
            clf = clone(self.base_estimator)
            if isinstance(clf, RegressorMixin):
                clf = as_classifier(clf)
            if sample_weight is None:
                clf.fit(X, y)
            else:
                clf.fit(X, y, sample_weight=sample_weight)
        else:
            clf = self.base_estimator
        self.classifiers_ = [clf]
        # Calibrator
        calibrator = clone(base_calibrator)
        T = clf.predict_proba(X)[:, 1]
        if sample_weight is None:
            calibrator.fit(T, y)
        else:
            calibrator.fit(T, y, sample_weight=sample_weight)
        self.calibrators_ = [calibrator]
    else:
        self.classifiers_ = []
        self.calibrators_ = []
        cv = check_cv(self.cv, X=X, y=y, classifier=True)
        for train, calibrate in cv.split(X, y):
            # Classifier
            clf = clone(self.base_estimator)
            if isinstance(clf, RegressorMixin):
                clf = as_classifier(clf)
            if sample_weight is None:
                clf.fit(X[train], y[train])
            else:
                clf.fit(X[train], y[train],
                        sample_weight=sample_weight[train])
            self.classifiers_.append(clf)
            # Calibrator
            calibrator = clone(base_calibrator)
            T = clf.predict_proba(X[calibrate])[:, 1]
            if sample_weight is None:
                calibrator.fit(T, y[calibrate])
            else:
                calibrator.fit(T, y[calibrate],
                               sample_weight=sample_weight[calibrate])
            self.calibrators_.append(calibrator)
    return self

def predict(

self, X)

Predict the targets for X.

Can be different from the predictions of the uncalibrated classifier.

Parameters

X [array-like, shape=(n_samples, n_features)]: The samples.

Returns

y [array, shape=(n_samples,)]: The predicted class.

Show source ≡

def predict(self, X):
    """Predict the targets for `X`.
    Can be different from the predictions of the uncalibrated classifier.
    Parameters
    ----------
    * `X` [array-like, shape=(n_samples, n_features)]:
        The samples.
    Returns
    -------
    * `y` [array, shape=(n_samples,)]:
        The predicted class.
    """
    return np.where(self.predict_proba(X)[:, 1] >= 0.5,
                    self.classes_[1],
                    self.classes_[0])

def predict_proba(

self, X)

Predict the posterior probabilities of classification for X.

Parameters

X [array-like, shape=(n_samples, n_features)]: The samples.

Returns

probas [array, shape=(n_samples, n_classes)]: The predicted probabilities.

Show source ≡

def predict_proba(self, X):
    """Predict the posterior probabilities of classification for `X`.
    Parameters
    ----------
    * `X` [array-like, shape=(n_samples, n_features)]:
        The samples.
    Returns
    -------
    * `probas` [array, shape=(n_samples, n_classes)]:
        The predicted probabilities.
    """
    p = np.zeros((len(X), 2))
    for clf, calibrator in zip(self.classifiers_, self.calibrators_):
        p[:, 1] += calibrator.predict(clf.predict_proba(X)[:, 1])
    p[:, 1] /= len(self.classifiers_)
    p[:, 0] = 1. - p[:, 1]
    return p

Instance variables

var base_estimator

var bins

var cv

var interpolation

var method

var variable_width

class ParameterStacker

Stack current parameter values as additional features.

Show source ≡

class ParameterStacker(BaseEstimator, TransformerMixin):
    """Stack current parameter values as additional features."""

    def __init__(self, params):
        """Constructor.

        Parameters
        ----------
        * `params` [list of Theano shared variables]:
            The parameters.
        """
        self.params = params

    def transform(self, X, y=None):
        """Stack current parameter values as additional features.

        Parameters
        ----------
        * `X` [array-like, shape=(n_samples, n_features)]:
            The samples.

        Returns
        -------
        * `Xt` [array, shape=(n_samples, n_features+len(params))]:
            The horizontal concatenation of X with the current parameter
            values, added as new columns.
        """
        Xp = np.empty((len(X), len(self.params)))

        for i, p in enumerate(self.params):
            Xp[:, i] = p.eval()

        return np.hstack((X, Xp))

Ancestors (in MRO)

ParameterStacker
sklearn.base.BaseEstimator
sklearn.base.TransformerMixin
builtins.object

Static methods

def __init__(

self, params)

Constructor.

Parameters

params [list of Theano shared variables]: The parameters.

Show source ≡

def __init__(self, params):
    """Constructor.
    Parameters
    ----------
    * `params` [list of Theano shared variables]:
        The parameters.
    """
    self.params = params

def transform(

self, X, y=None)

Stack current parameter values as additional features.

Parameters

X [array-like, shape=(n_samples, n_features)]: The samples.

Returns

Xt [array, shape=(n_samples, n_features+len(params))]: The horizontal concatenation of X with the current parameter values, added as new columns.

Show source ≡

def transform(self, X, y=None):
    """Stack current parameter values as additional features.
    Parameters
    ----------
    * `X` [array-like, shape=(n_samples, n_features)]:
        The samples.
    Returns
    -------
    * `Xt` [array, shape=(n_samples, n_features+len(params))]:
        The horizontal concatenation of X with the current parameter
        values, added as new columns.
    """
    Xp = np.empty((len(X), len(self.params)))
    for i, p in enumerate(self.params):
        Xp[:, i] = p.eval()
    return np.hstack((X, Xp))

Instance variables

var params

class ParameterizedClassifier

Parameterize a Scikit-Learn classifier.

This wrapper can be used to learn a parameterized classification problem, where parameter values are automatically added as additional features.

Show source ≡

class ParameterizedClassifier(_ParameterizedEstimator, ClassifierMixin):
    """Parameterize a Scikit-Learn classifier.

    This wrapper can be used to learn a parameterized classification problem,
    where parameter values are automatically added as additional features.
    """

    def predict_proba(self, X):
        """Predict the posterior probabilities of classification for X.

        Parameter values are automatically appended from the current state
        of the parameters if those are not provided with X.

        Parameters
        ----------
        * `X` [array-like, shape=(n_samples, n_features) or
                           shape=(n_samples, n_features+len(params))]:
            The samples.

        Returns
        -------
        * `probas` [array, shape=(n_samples, n_classes)]:
            The predicted probabilities.
        """
        return self.estimator_.predict_proba(self._validate_X(X))

Ancestors (in MRO)

ParameterizedClassifier
carl.learning.parameterize._ParameterizedEstimator
sklearn.base.BaseEstimator
sklearn.base.ClassifierMixin
builtins.object

Static methods

def __init__(

self, base_estimator, params)

Constructor.

Parameters

base_estimator [BaseEstimator]: The estimator to parameterize.
params [list of Theano shared variables]: The parameters.

Show source ≡

def __init__(self, base_estimator, params):
    """Constructor.
    Parameters
    ----------
    * `base_estimator` [`BaseEstimator`]:
        The estimator to parameterize.
    * `params` [list of Theano shared variables]:
        The parameters.
    """
    self.base_estimator = base_estimator
    self.params = params

def fit(

self, X, y)

Fit estimator on parameterized data.

Parameters

X [array-like, shape=(n_samples, n_features+len(params))]: The samples, concatenated with the corresponding parameter values.
y [array-like, shape=(n_samples,)]: The output values.

Returns

self [object]: self.

Show source ≡

def fit(self, X, y):
    """Fit estimator on parameterized data.
    Parameters
    ----------
    * `X` [array-like, shape=(n_samples, n_features+len(params))]:
        The samples, concatenated with the corresponding parameter values.
    * `y` [array-like, shape=(n_samples,)]:
        The output values.
    Returns
    -------
    * `self` [object]:
        `self`.
    """
    self.stacker_ = ParameterStacker(self.params)
    # XXX: this assumes that X is extended with parameters
    self.n_features_ = X.shape[1] - len(self.params)
    self.estimator_ = clone(self.base_estimator).fit(X, y)
    return self

def predict(

self, X)

Predict the targets for X.

Parameter values are automatically appended from the current state of the parameters if those are not provided with X.

Parameters

X [array-like, shape=(n_samples, n_features) or shape=(n_samples, n_features+len(params))]: The samples.

Returns

y [array, shape=(n_samples,)]: The predicted output values.

Show source ≡

def predict(self, X):
    """Predict the targets for `X`.
    Parameter values are automatically appended from the current state
    of the parameters if those are not provided with `X`.
    Parameters
    ----------
    * `X` [array-like, shape=(n_samples, n_features) or
                       shape=(n_samples, n_features+len(params))]:
        The samples.
    Returns
    -------
    * `y` [array, shape=(n_samples,)]:
        The predicted output values.
    """
    return self.estimator_.predict(self._validate_X(X))

def predict_proba(

self, X)

Predict the posterior probabilities of classification for X.

Parameter values are automatically appended from the current state of the parameters if those are not provided with X.

Parameters

X [array-like, shape=(n_samples, n_features) or shape=(n_samples, n_features+len(params))]: The samples.

Returns

probas [array, shape=(n_samples, n_classes)]: The predicted probabilities.

Show source ≡

def predict_proba(self, X):
    """Predict the posterior probabilities of classification for X.
    Parameter values are automatically appended from the current state
    of the parameters if those are not provided with X.
    Parameters
    ----------
    * `X` [array-like, shape=(n_samples, n_features) or
                       shape=(n_samples, n_features+len(params))]:
        The samples.
    Returns
    -------
    * `probas` [array, shape=(n_samples, n_classes)]:
        The predicted probabilities.
    """
    return self.estimator_.predict_proba(self._validate_X(X))

class ParameterizedRegressor

Parameterize a Scikit-Learn regressor.

This wrapper can be used to learn a parameterized regression problem, where parameter values are automatically added as additional features.

Show source ≡

class ParameterizedRegressor(_ParameterizedEstimator, RegressorMixin):
    """Parameterize a Scikit-Learn regressor.

    This wrapper can be used to learn a parameterized regression problem,
    where parameter values are automatically added as additional features.
    """

    pass

Ancestors (in MRO)

ParameterizedRegressor
carl.learning.parameterize._ParameterizedEstimator
sklearn.base.BaseEstimator
sklearn.base.RegressorMixin
builtins.object

Static methods

def __init__(

self, base_estimator, params)

Constructor.

Parameters

base_estimator [BaseEstimator]: The estimator to parameterize.
params [list of Theano shared variables]: The parameters.

Show source ≡

def __init__(self, base_estimator, params):
    """Constructor.
    Parameters
    ----------
    * `base_estimator` [`BaseEstimator`]:
        The estimator to parameterize.
    * `params` [list of Theano shared variables]:
        The parameters.
    """
    self.base_estimator = base_estimator
    self.params = params

def fit(

self, X, y)

Fit estimator on parameterized data.

Parameters

X [array-like, shape=(n_samples, n_features+len(params))]: The samples, concatenated with the corresponding parameter values.
y [array-like, shape=(n_samples,)]: The output values.

Returns

self [object]: self.

Show source ≡

def fit(self, X, y):
    """Fit estimator on parameterized data.
    Parameters
    ----------
    * `X` [array-like, shape=(n_samples, n_features+len(params))]:
        The samples, concatenated with the corresponding parameter values.
    * `y` [array-like, shape=(n_samples,)]:
        The output values.
    Returns
    -------
    * `self` [object]:
        `self`.
    """
    self.stacker_ = ParameterStacker(self.params)
    # XXX: this assumes that X is extended with parameters
    self.n_features_ = X.shape[1] - len(self.params)
    self.estimator_ = clone(self.base_estimator).fit(X, y)
    return self

def predict(

self, X)

Predict the targets for X.

Parameter values are automatically appended from the current state of the parameters if those are not provided with X.

Parameters

X [array-like, shape=(n_samples, n_features) or shape=(n_samples, n_features+len(params))]: The samples.

Returns

y [array, shape=(n_samples,)]: The predicted output values.

Show source ≡

def predict(self, X):
    """Predict the targets for `X`.
    Parameter values are automatically appended from the current state
    of the parameters if those are not provided with `X`.
    Parameters
    ----------
    * `X` [array-like, shape=(n_samples, n_features) or
                       shape=(n_samples, n_features+len(params))]:
        The samples.
    Returns
    -------
    * `y` [array, shape=(n_samples,)]:
        The predicted output values.
    """
    return self.estimator_.predict(self._validate_X(X))