Fork me on GitHub Top

carl.learning module

This module implements machine learning algorithms and utilities, complementary to Scikit-Learn.

"""
This module implements machine learning algorithms and utilities,
complementary to Scikit-Learn.
"""

# Carl is free software; you can redistribute it and/or modify it
# under the terms of the Revised BSD License; see LICENSE file for
# more details.

from .base import as_classifier
from .base import check_cv
from .calibration import CalibratedClassifierCV
from .parameterize import make_parameterized_classification
from .parameterize import ParameterStacker
from .parameterize import ParameterizedClassifier
from .parameterize import ParameterizedRegressor


__all__ = ("as_classifier",
           "check_cv",
           "CalibratedClassifierCV",
           "make_parameterized_classification",
           "ParameterStacker",
           "ParameterizedClassifier",
           "ParameterizedRegressor",)

Functions

def as_classifier(

regressor)

Wrap a Scikit-Learn regressor into a binary classifier.

This function can be used to solve a binary classification problem as a regression problem, where output labels {0,1} are treated as real values. The wrapped regressor exhibits the classifier API, with the corresponding predict, predict_proba and score methods.

Parameters

  • regressor [RegressorMixin]: The regressor object.

Returns

  • clf [ClassifierMixin]: The wrapped regressor, but with a classifier API.
def as_classifier(regressor):
    """Wrap a Scikit-Learn regressor into a binary classifier.

    This function can be used to solve a binary classification problem as a
    regression problem, where output labels {0,1} are treated as real values.
    The wrapped regressor exhibits the classifier API, with the corresponding
    `predict`, `predict_proba` and `score` methods.

    Parameters
    ----------
    * `regressor` [`RegressorMixin`]:
        The regressor object.

    Returns
    -------
    * `clf` [`ClassifierMixin`]:
        The wrapped regressor, but with a classifier API.
    """
    class Wrapper(BaseEstimator, ClassifierMixin):
        def __init__(self, base_estimator):
            self.base_estimator = base_estimator

        def fit(self, X, y, **kwargs):
            # Check inputs
            X, y = check_X_y(X, y)

            # Convert y
            label_encoder = LabelEncoder()
            y = label_encoder.fit_transform(y).astype(np.float)

            if len(label_encoder.classes_) != 2:
                raise ValueError

            self.classes_ = label_encoder.classes_

            # Fit regressor
            self.regressor_ = clone(self.base_estimator).fit(X, y, **kwargs)

            return self

        def predict(self, X):
            return np.where(self.predict_proba(X)[:, 1] >= 0.5,
                            self.classes_[1],
                            self.classes_[0])

        def predict_proba(self, X):
            X = check_array(X)

            df = self.regressor_.predict(X)
            df = np.clip(df, 0., 1.)
            probas = np.zeros((len(X), 2))
            probas[:, 0] = 1. - df
            probas[:, 1] = df

            return probas

        def score(self, X, y):
            return self.regressor_.score(X, y)

    return Wrapper(regressor)

def check_cv(

cv=3, X=None, y=None, classifier=False)

Input checker utility for building a cross-validator.

Parameters

  • cv [integer, cross-validation generator or an iterable, default=3]: Determines the cross-validation splitting strategy. Possible inputs for cv are:

    • integer, to specify the number of folds.
    • An object to be used as a cross-validation generator.
    • An iterable yielding train/test splits.

    For integer/None inputs, if classifier is True and y is either binary or multiclass, StratifiedKFold used. In all other cases, KFold is used.

  • y [array-like, optional]: The target variable for supervised learning problems.

  • classifier [boolean, default=False]: Whether the task is a classification task, in which case stratified KFold will be used.

Returns

  • checked_cv [a cross-validator instance]: The return value is a cross-validator which generates the train/test splits via the split method.

Note

This method is backported from scikit-learn 0.18.

def check_cv(cv=3, X=None, y=None, classifier=False):
    """Input checker utility for building a cross-validator.

    Parameters
    ----------
    * `cv` [integer, cross-validation generator or an iterable, default=`3`]:
        Determines the cross-validation splitting strategy.
        Possible inputs for cv are:

          - integer, to specify the number of folds.
          - An object to be used as a cross-validation generator.
          - An iterable yielding train/test splits.

        For integer/None inputs, if classifier is True and `y` is either
        binary or multiclass, `StratifiedKFold` used. In all other
        cases, `KFold` is used.

    * `y` [array-like, optional]:
        The target variable for supervised learning problems.

    * `classifier` [boolean, default=`False`]:
        Whether the task is a classification task, in which case
        stratified `KFold` will be used.

    Returns
    -------
    * `checked_cv` [a cross-validator instance]:
        The return value is a cross-validator which generates the train/test
        splits via the `split` method.

    Note
    ----
    This method is backported from scikit-learn 0.18.
    """
    return sklearn_check_cv(cv, y=y, classifier=classifier)

def make_parameterized_classification(

p0, p1, n_samples, params, random_state=None)

Generate parameterized classification data.

This function generates parameterized classification data, by enumerating all possible combinations of provided parameter values and producing samples in equal number from p0 and p1.

Parameters

  • p0 [DistributionMixin]: The distribution to draw samples from class 0.

  • p1 [DistributionMixin]: The distribution to draw samples from class 1.

  • n_samples [integer]: The total number of samples to generate.

  • params [list of pairs (theano shared variables, list of values) or list of theano shared variables]: The list of parameters and the corresponding values to generate samples for. If only a list of theano shared variables is given, then generate samples using the current parameter values.

  • random_state [integer or RandomState object]: The random seed.

Returns

  • X [array, shape=(n_samples, n_features+len(params))]: The generated training data, as sample features and concatenated parameter values.

  • y [array, shape=(n_samples,)]: The labels.

def make_parameterized_classification(p0, p1, n_samples, params,
                                      random_state=None):
    """Generate parameterized classification data.

    This function generates parameterized classification data, by enumerating
    all possible combinations of provided parameter values and producing
    samples in equal number from `p0` and `p1`.

    Parameters
    ----------
    * `p0` [`DistributionMixin`]:
        The distribution to draw samples from class 0.

    * `p1` [`DistributionMixin`]:
        The distribution to draw samples from class 1.

    * `n_samples` [integer]:
        The total number of samples to generate.

    * `params` [list of pairs (theano shared variables, list of values) or
                list of theano shared variables]:
        The list of parameters and the corresponding values to generate
        samples for. If only a list of theano shared variables is given, then
        generate samples using the current parameter values.

    * `random_state` [integer or RandomState object]:
        The random seed.

    Returns
    -------
    * `X` [array, shape=(n_samples, n_features+len(params))]:
        The generated training data, as sample features and concatenated
        parameter values.

    * `y` [array, shape=(n_samples,)]:
        The labels.
    """
    rng = check_random_state(random_state)

    if not isinstance(params[0], tuple):
        X0 = p0.rvs(n_samples // 2, random_state=rng)
        X1 = p1.rvs(n_samples - (n_samples // 2), random_state=rng)
        X = ParameterStacker(params).transform(np.vstack((X0, X1)))
        y = np.zeros(n_samples)
        y[len(X0):] = 1

        return X, y

    elif isinstance(params[0], tuple):
        combinations = list(product(*[values for _, values in params]))

        all_X = []
        all_y = []

        for c in combinations:
            for i, v in enumerate(c):
                params[i][0].set_value(v)

            X, y = make_parameterized_classification(
                p0, p1,
                n_samples // len(combinations),
                [p for p, _ in params],
                random_state=rng)

            all_X.append(X)
            all_y.append(y)

        X = np.vstack(all_X)
        y = np.concatenate(all_y)

        return X, y

    else:
        raise ValueError

Classes

class CalibratedClassifierCV

Probability calibration.

With this class, the base_estimator is fit on the train set of the cross-validation generator and the test set is used for calibration. The probabilities for each of the folds are then averaged for prediction.

class CalibratedClassifierCV(BaseEstimator, ClassifierMixin):
    """Probability calibration.

    With this class, the `base_estimator` is fit on the train set of the
    cross-validation generator and the test set is used for calibration. The
    probabilities for each of the folds are then averaged for prediction.
    """

    def __init__(self, base_estimator, method="histogram", bins="auto",
                 interpolation=None, variable_width=False, cv=1):
        """Constructor.

        Parameters
        ----------
        * `base_estimator` [`ClassifierMixin`]:
            The classifier whose output decision function needs to be
            calibrated to offer more accurate predict_proba outputs. If
            `cv=prefit`, the classifier must have been fit already on data.

        * `method` [string]:
            The method to use for calibration. Supported methods include
            `"histogram"`, `"kde"`, `"isotonic"`, `"interpolated-isotonic"` and
            `"sigmoid"`.

        * `bins` [int, default="auto"]:
            The number of bins, if `method` is `"histogram"`.

        * `interpolation` [string, optional]
            Specifies the kind of interpolation between bins as a string
            (`"linear"`, `"nearest"`, `"zero"`, `"slinear"`, `"quadratic"`,
            `"cubic"`), if `method` is `"histogram"`.

        * `variable_dith_width` [boolean, optional]
            If True use equal probability variable length bins, if
            `method` is `"histogram"`.

        * `cv` [integer, cross-validation generator, iterable or `"prefit"`]:
            Determines the cross-validation splitting strategy.
            Possible inputs for cv are:

            - integer, to specify the number of folds.
            - An object to be used as a cross-validation generator.
            - An iterable yielding train/test splits.

            If `"prefit"` is passed, it is assumed that base_estimator has been
            fitted already and all data is used for calibration. If `cv=1`,
            the training data is used for both training and calibration.
        """
        self.base_estimator = base_estimator
        self.method = method
        self.bins = bins
        self.interpolation = interpolation
        self.variable_width = variable_width
        self.cv = cv

    def fit(self, X, y, sample_weight=None):
        """Fit the calibrated model.

        Parameters
        ----------
        * `X` [array-like, shape=(n_samples, n_features)]:
            Training data.

        * `y` [array-like, shape=(n_samples,)]:
            Target values.

        Returns
        -------
        * `self` [object]:
            `self`.
        """
        # Check inputs
        X, y = check_X_y(X, y)

        # Convert y
        label_encoder = LabelEncoder()
        y = label_encoder.fit_transform(y).astype(np.float)

        if len(label_encoder.classes_) != 2:
            raise ValueError

        self.classes_ = label_encoder.classes_
        # Calibrator
        if self.method == "histogram":
            base_calibrator = HistogramCalibrator(
                bins=self.bins, interpolation=self.interpolation,
                variable_width=self.variable_width)
        elif self.method == "kde":
            base_calibrator = KernelDensityCalibrator()
        elif self.method == "isotonic":
            base_calibrator = IsotonicCalibrator()
        elif self.method == "interpolated-isotonic":
            base_calibrator = IsotonicCalibrator(interpolation=True)
        elif self.method == "sigmoid":
            base_calibrator = SigmoidCalibrator()
        else:
            base_calibrator = self.method
        # Fit
        if self.cv == "prefit" or self.cv == 1:
            # Classifier
            if self.cv == 1:
                clf = clone(self.base_estimator)

                if isinstance(clf, RegressorMixin):
                    clf = as_classifier(clf)

                if sample_weight is None:
                    clf.fit(X, y)
                else:
                    clf.fit(X, y, sample_weight=sample_weight)

            else:
                clf = self.base_estimator

            self.classifiers_ = [clf]

            # Calibrator
            calibrator = clone(base_calibrator)
            T = clf.predict_proba(X)[:, 1]

            if sample_weight is None:
                calibrator.fit(T, y)
            else:
                calibrator.fit(T, y, sample_weight=sample_weight)

            self.calibrators_ = [calibrator]

        else:
            self.classifiers_ = []
            self.calibrators_ = []

            cv = check_cv(self.cv, X=X, y=y, classifier=True)

            for train, calibrate in cv.split(X, y):
                # Classifier
                clf = clone(self.base_estimator)

                if isinstance(clf, RegressorMixin):
                    clf = as_classifier(clf)

                if sample_weight is None:
                    clf.fit(X[train], y[train])
                else:
                    clf.fit(X[train], y[train],
                            sample_weight=sample_weight[train])

                self.classifiers_.append(clf)

                # Calibrator
                calibrator = clone(base_calibrator)
                T = clf.predict_proba(X[calibrate])[:, 1]

                if sample_weight is None:
                    calibrator.fit(T, y[calibrate])
                else:
                    calibrator.fit(T, y[calibrate],
                                   sample_weight=sample_weight[calibrate])

                self.calibrators_.append(calibrator)

        return self

    def predict(self, X):
        """Predict the targets for `X`.

        Can be different from the predictions of the uncalibrated classifier.

        Parameters
        ----------
        * `X` [array-like, shape=(n_samples, n_features)]:
            The samples.

        Returns
        -------
        * `y` [array, shape=(n_samples,)]:
            The predicted class.
        """
        return np.where(self.predict_proba(X)[:, 1] >= 0.5,
                        self.classes_[1],
                        self.classes_[0])

    def predict_proba(self, X):
        """Predict the posterior probabilities of classification for `X`.

        Parameters
        ----------
        * `X` [array-like, shape=(n_samples, n_features)]:
            The samples.

        Returns
        -------
        * `probas` [array, shape=(n_samples, n_classes)]:
            The predicted probabilities.
        """
        p = np.zeros((len(X), 2))

        for clf, calibrator in zip(self.classifiers_, self.calibrators_):
            p[:, 1] += calibrator.predict(clf.predict_proba(X)[:, 1])

        p[:, 1] /= len(self.classifiers_)
        p[:, 0] = 1. - p[:, 1]

        return p

    def _clone(self):
        estimator = clone(self, original=True)
        if self.cv == "prefit":
            estimator.base_estimator = self.base_estimator

        return estimator

Ancestors (in MRO)

Static methods

def __init__(

self, base_estimator, method='histogram', bins='auto', interpolation=None, variable_width=False, cv=1)

Constructor.

Parameters

  • base_estimator [ClassifierMixin]: The classifier whose output decision function needs to be calibrated to offer more accurate predict_proba outputs. If cv=prefit, the classifier must have been fit already on data.

  • method [string]: The method to use for calibration. Supported methods include "histogram", "kde", "isotonic", "interpolated-isotonic" and "sigmoid".

  • bins [int, default="auto"]: The number of bins, if method is "histogram".

  • interpolation [string, optional] Specifies the kind of interpolation between bins as a string ("linear", "nearest", "zero", "slinear", "quadratic", "cubic"), if method is "histogram".

  • variable_dith_width [boolean, optional] If True use equal probability variable length bins, if method is "histogram".

  • cv [integer, cross-validation generator, iterable or "prefit"]: Determines the cross-validation splitting strategy. Possible inputs for cv are:

    • integer, to specify the number of folds.
    • An object to be used as a cross-validation generator.
    • An iterable yielding train/test splits.

    If "prefit" is passed, it is assumed that base_estimator has been fitted already and all data is used for calibration. If cv=1, the training data is used for both training and calibration.

def __init__(self, base_estimator, method="histogram", bins="auto",
             interpolation=None, variable_width=False, cv=1):
    """Constructor.
    Parameters
    ----------
    * `base_estimator` [`ClassifierMixin`]:
        The classifier whose output decision function needs to be
        calibrated to offer more accurate predict_proba outputs. If
        `cv=prefit`, the classifier must have been fit already on data.
    * `method` [string]:
        The method to use for calibration. Supported methods include
        `"histogram"`, `"kde"`, `"isotonic"`, `"interpolated-isotonic"` and
        `"sigmoid"`.
    * `bins` [int, default="auto"]:
        The number of bins, if `method` is `"histogram"`.
    * `interpolation` [string, optional]
        Specifies the kind of interpolation between bins as a string
        (`"linear"`, `"nearest"`, `"zero"`, `"slinear"`, `"quadratic"`,
        `"cubic"`), if `method` is `"histogram"`.
    * `variable_dith_width` [boolean, optional]
        If True use equal probability variable length bins, if
        `method` is `"histogram"`.
    * `cv` [integer, cross-validation generator, iterable or `"prefit"`]:
        Determines the cross-validation splitting strategy.
        Possible inputs for cv are:
        - integer, to specify the number of folds.
        - An object to be used as a cross-validation generator.
        - An iterable yielding train/test splits.
        If `"prefit"` is passed, it is assumed that base_estimator has been
        fitted already and all data is used for calibration. If `cv=1`,
        the training data is used for both training and calibration.
    """
    self.base_estimator = base_estimator
    self.method = method
    self.bins = bins
    self.interpolation = interpolation
    self.variable_width = variable_width
    self.cv = cv

def fit(

self, X, y, sample_weight=None)

Fit the calibrated model.

Parameters

  • X [array-like, shape=(n_samples, n_features)]: Training data.

  • y [array-like, shape=(n_samples,)]: Target values.

Returns

  • self [object]: self.
def fit(self, X, y, sample_weight=None):
    """Fit the calibrated model.
    Parameters
    ----------
    * `X` [array-like, shape=(n_samples, n_features)]:
        Training data.
    * `y` [array-like, shape=(n_samples,)]:
        Target values.
    Returns
    -------
    * `self` [object]:
        `self`.
    """
    # Check inputs
    X, y = check_X_y(X, y)
    # Convert y
    label_encoder = LabelEncoder()
    y = label_encoder.fit_transform(y).astype(np.float)
    if len(label_encoder.classes_) != 2:
        raise ValueError
    self.classes_ = label_encoder.classes_
    # Calibrator
    if self.method == "histogram":
        base_calibrator = HistogramCalibrator(
            bins=self.bins, interpolation=self.interpolation,
            variable_width=self.variable_width)
    elif self.method == "kde":
        base_calibrator = KernelDensityCalibrator()
    elif self.method == "isotonic":
        base_calibrator = IsotonicCalibrator()
    elif self.method == "interpolated-isotonic":
        base_calibrator = IsotonicCalibrator(interpolation=True)
    elif self.method == "sigmoid":
        base_calibrator = SigmoidCalibrator()
    else:
        base_calibrator = self.method
    # Fit
    if self.cv == "prefit" or self.cv == 1:
        # Classifier
        if self.cv == 1:
            clf = clone(self.base_estimator)
            if isinstance(clf, RegressorMixin):
                clf = as_classifier(clf)
            if sample_weight is None:
                clf.fit(X, y)
            else:
                clf.fit(X, y, sample_weight=sample_weight)
        else:
            clf = self.base_estimator
        self.classifiers_ = [clf]
        # Calibrator
        calibrator = clone(base_calibrator)
        T = clf.predict_proba(X)[:, 1]
        if sample_weight is None:
            calibrator.fit(T, y)
        else:
            calibrator.fit(T, y, sample_weight=sample_weight)
        self.calibrators_ = [calibrator]
    else:
        self.classifiers_ = []
        self.calibrators_ = []
        cv = check_cv(self.cv, X=X, y=y, classifier=True)
        for train, calibrate in cv.split(X, y):
            # Classifier
            clf = clone(self.base_estimator)
            if isinstance(clf, RegressorMixin):
                clf = as_classifier(clf)
            if sample_weight is None:
                clf.fit(X[train], y[train])
            else:
                clf.fit(X[train], y[train],
                        sample_weight=sample_weight[train])
            self.classifiers_.append(clf)
            # Calibrator
            calibrator = clone(base_calibrator)
            T = clf.predict_proba(X[calibrate])[:, 1]
            if sample_weight is None:
                calibrator.fit(T, y[calibrate])
            else:
                calibrator.fit(T, y[calibrate],
                               sample_weight=sample_weight[calibrate])
            self.calibrators_.append(calibrator)
    return self

def predict(

self, X)

Predict the targets for X.

Can be different from the predictions of the uncalibrated classifier.

Parameters

  • X [array-like, shape=(n_samples, n_features)]: The samples.

Returns

  • y [array, shape=(n_samples,)]: The predicted class.
def predict(self, X):
    """Predict the targets for `X`.
    Can be different from the predictions of the uncalibrated classifier.
    Parameters
    ----------
    * `X` [array-like, shape=(n_samples, n_features)]:
        The samples.
    Returns
    -------
    * `y` [array, shape=(n_samples,)]:
        The predicted class.
    """
    return np.where(self.predict_proba(X)[:, 1] >= 0.5,
                    self.classes_[1],
                    self.classes_[0])

def predict_proba(

self, X)

Predict the posterior probabilities of classification for X.

Parameters

  • X [array-like, shape=(n_samples, n_features)]: The samples.

Returns

  • probas [array, shape=(n_samples, n_classes)]: The predicted probabilities.
def predict_proba(self, X):
    """Predict the posterior probabilities of classification for `X`.
    Parameters
    ----------
    * `X` [array-like, shape=(n_samples, n_features)]:
        The samples.
    Returns
    -------
    * `probas` [array, shape=(n_samples, n_classes)]:
        The predicted probabilities.
    """
    p = np.zeros((len(X), 2))
    for clf, calibrator in zip(self.classifiers_, self.calibrators_):
        p[:, 1] += calibrator.predict(clf.predict_proba(X)[:, 1])
    p[:, 1] /= len(self.classifiers_)
    p[:, 0] = 1. - p[:, 1]
    return p

Instance variables

var base_estimator

var bins

var cv

var interpolation

var method

var variable_width

class ParameterStacker

Stack current parameter values as additional features.

class ParameterStacker(BaseEstimator, TransformerMixin):
    """Stack current parameter values as additional features."""

    def __init__(self, params):
        """Constructor.

        Parameters
        ----------
        * `params` [list of Theano shared variables]:
            The parameters.
        """
        self.params = params

    def transform(self, X, y=None):
        """Stack current parameter values as additional features.

        Parameters
        ----------
        * `X` [array-like, shape=(n_samples, n_features)]:
            The samples.

        Returns
        -------
        * `Xt` [array, shape=(n_samples, n_features+len(params))]:
            The horizontal concatenation of X with the current parameter
            values, added as new columns.
        """
        Xp = np.empty((len(X), len(self.params)))

        for i, p in enumerate(self.params):
            Xp[:, i] = p.eval()

        return np.hstack((X, Xp))

Ancestors (in MRO)

  • ParameterStacker
  • sklearn.base.BaseEstimator
  • sklearn.base.TransformerMixin
  • builtins.object

Static methods

def __init__(

self, params)

Constructor.

Parameters

  • params [list of Theano shared variables]: The parameters.
def __init__(self, params):
    """Constructor.
    Parameters
    ----------
    * `params` [list of Theano shared variables]:
        The parameters.
    """
    self.params = params

def transform(

self, X, y=None)

Stack current parameter values as additional features.

Parameters

  • X [array-like, shape=(n_samples, n_features)]: The samples.

Returns

  • Xt [array, shape=(n_samples, n_features+len(params))]: The horizontal concatenation of X with the current parameter values, added as new columns.
def transform(self, X, y=None):
    """Stack current parameter values as additional features.
    Parameters
    ----------
    * `X` [array-like, shape=(n_samples, n_features)]:
        The samples.
    Returns
    -------
    * `Xt` [array, shape=(n_samples, n_features+len(params))]:
        The horizontal concatenation of X with the current parameter
        values, added as new columns.
    """
    Xp = np.empty((len(X), len(self.params)))
    for i, p in enumerate(self.params):
        Xp[:, i] = p.eval()
    return np.hstack((X, Xp))

Instance variables

var params

class ParameterizedClassifier

Parameterize a Scikit-Learn classifier.

This wrapper can be used to learn a parameterized classification problem, where parameter values are automatically added as additional features.

class ParameterizedClassifier(_ParameterizedEstimator, ClassifierMixin):
    """Parameterize a Scikit-Learn classifier.

    This wrapper can be used to learn a parameterized classification problem,
    where parameter values are automatically added as additional features.
    """

    def predict_proba(self, X):
        """Predict the posterior probabilities of classification for X.

        Parameter values are automatically appended from the current state
        of the parameters if those are not provided with X.

        Parameters
        ----------
        * `X` [array-like, shape=(n_samples, n_features) or
                           shape=(n_samples, n_features+len(params))]:
            The samples.

        Returns
        -------
        * `probas` [array, shape=(n_samples, n_classes)]:
            The predicted probabilities.
        """
        return self.estimator_.predict_proba(self._validate_X(X))

Ancestors (in MRO)

  • ParameterizedClassifier
  • carl.learning.parameterize._ParameterizedEstimator
  • sklearn.base.BaseEstimator
  • sklearn.base.ClassifierMixin
  • builtins.object

Static methods

def __init__(

self, base_estimator, params)

Constructor.

Parameters

  • base_estimator [BaseEstimator]: The estimator to parameterize.

  • params [list of Theano shared variables]: The parameters.

def __init__(self, base_estimator, params):
    """Constructor.
    Parameters
    ----------
    * `base_estimator` [`BaseEstimator`]:
        The estimator to parameterize.
    * `params` [list of Theano shared variables]:
        The parameters.
    """
    self.base_estimator = base_estimator
    self.params = params

def fit(

self, X, y)

Fit estimator on parameterized data.

Parameters

  • X [array-like, shape=(n_samples, n_features+len(params))]: The samples, concatenated with the corresponding parameter values.

  • y [array-like, shape=(n_samples,)]: The output values.

Returns

  • self [object]: self.
def fit(self, X, y):
    """Fit estimator on parameterized data.
    Parameters
    ----------
    * `X` [array-like, shape=(n_samples, n_features+len(params))]:
        The samples, concatenated with the corresponding parameter values.
    * `y` [array-like, shape=(n_samples,)]:
        The output values.
    Returns
    -------
    * `self` [object]:
        `self`.
    """
    self.stacker_ = ParameterStacker(self.params)
    # XXX: this assumes that X is extended with parameters
    self.n_features_ = X.shape[1] - len(self.params)
    self.estimator_ = clone(self.base_estimator).fit(X, y)
    return self

def predict(

self, X)

Predict the targets for X.

Parameter values are automatically appended from the current state of the parameters if those are not provided with X.

Parameters

  • X [array-like, shape=(n_samples, n_features) or shape=(n_samples, n_features+len(params))]: The samples.

Returns

  • y [array, shape=(n_samples,)]: The predicted output values.
def predict(self, X):
    """Predict the targets for `X`.
    Parameter values are automatically appended from the current state
    of the parameters if those are not provided with `X`.
    Parameters
    ----------
    * `X` [array-like, shape=(n_samples, n_features) or
                       shape=(n_samples, n_features+len(params))]:
        The samples.
    Returns
    -------
    * `y` [array, shape=(n_samples,)]:
        The predicted output values.
    """
    return self.estimator_.predict(self._validate_X(X))

def predict_proba(

self, X)

Predict the posterior probabilities of classification for X.

Parameter values are automatically appended from the current state of the parameters if those are not provided with X.

Parameters

  • X [array-like, shape=(n_samples, n_features) or shape=(n_samples, n_features+len(params))]: The samples.

Returns

  • probas [array, shape=(n_samples, n_classes)]: The predicted probabilities.
def predict_proba(self, X):
    """Predict the posterior probabilities of classification for X.
    Parameter values are automatically appended from the current state
    of the parameters if those are not provided with X.
    Parameters
    ----------
    * `X` [array-like, shape=(n_samples, n_features) or
                       shape=(n_samples, n_features+len(params))]:
        The samples.
    Returns
    -------
    * `probas` [array, shape=(n_samples, n_classes)]:
        The predicted probabilities.
    """
    return self.estimator_.predict_proba(self._validate_X(X))

class ParameterizedRegressor

Parameterize a Scikit-Learn regressor.

This wrapper can be used to learn a parameterized regression problem, where parameter values are automatically added as additional features.

class ParameterizedRegressor(_ParameterizedEstimator, RegressorMixin):
    """Parameterize a Scikit-Learn regressor.

    This wrapper can be used to learn a parameterized regression problem,
    where parameter values are automatically added as additional features.
    """

    pass

Ancestors (in MRO)

  • ParameterizedRegressor
  • carl.learning.parameterize._ParameterizedEstimator
  • sklearn.base.BaseEstimator
  • sklearn.base.RegressorMixin
  • builtins.object

Static methods

def __init__(

self, base_estimator, params)

Constructor.

Parameters

  • base_estimator [BaseEstimator]: The estimator to parameterize.

  • params [list of Theano shared variables]: The parameters.

def __init__(self, base_estimator, params):
    """Constructor.
    Parameters
    ----------
    * `base_estimator` [`BaseEstimator`]:
        The estimator to parameterize.
    * `params` [list of Theano shared variables]:
        The parameters.
    """
    self.base_estimator = base_estimator
    self.params = params

def fit(

self, X, y)

Fit estimator on parameterized data.

Parameters

  • X [array-like, shape=(n_samples, n_features+len(params))]: The samples, concatenated with the corresponding parameter values.

  • y [array-like, shape=(n_samples,)]: The output values.

Returns

  • self [object]: self.
def fit(self, X, y):
    """Fit estimator on parameterized data.
    Parameters
    ----------
    * `X` [array-like, shape=(n_samples, n_features+len(params))]:
        The samples, concatenated with the corresponding parameter values.
    * `y` [array-like, shape=(n_samples,)]:
        The output values.
    Returns
    -------
    * `self` [object]:
        `self`.
    """
    self.stacker_ = ParameterStacker(self.params)
    # XXX: this assumes that X is extended with parameters
    self.n_features_ = X.shape[1] - len(self.params)
    self.estimator_ = clone(self.base_estimator).fit(X, y)
    return self

def predict(

self, X)

Predict the targets for X.

Parameter values are automatically appended from the current state of the parameters if those are not provided with X.

Parameters

  • X [array-like, shape=(n_samples, n_features) or shape=(n_samples, n_features+len(params))]: The samples.

Returns

  • y [array, shape=(n_samples,)]: The predicted output values.
def predict(self, X):
    """Predict the targets for `X`.
    Parameter values are automatically appended from the current state
    of the parameters if those are not provided with `X`.
    Parameters
    ----------
    * `X` [array-like, shape=(n_samples, n_features) or
                       shape=(n_samples, n_features+len(params))]:
        The samples.
    Returns
    -------
    * `y` [array, shape=(n_samples,)]:
        The predicted output values.
    """
    return self.estimator_.predict(self._validate_X(X))