carl.ratios module
This module implements density ratio estimators.
"""This module implements density ratio estimators.""" # Carl is free software; you can redistribute it and/or modify it # under the terms of the Revised BSD License; see LICENSE file for # more details. from .base import DensityRatioMixin from .base import KnownDensityRatio from .base import InverseRatio from .base import DecomposedRatio from .classifier import ClassifierRatio __all__ = ("DensityRatioMixin", "KnownDensityRatio", "InverseRatio", "DecomposedRatio", "ClassifierRatio")
Classes
class ClassifierRatio
Classifier-based density ratio estimator.
This class approximates a density ratio r(x) = p0(x) / p1(x) as
s(x) / 1 - s(x), where s is a classifier trained to distinguish
samples x ~ p0 from samples x ~ p1, and where s(x) is the
classifier approximate of the probability p0(x) / (p0(x) + p1(x)).
This class can be used in the likelihood-free setup, i.e. either
- with known data
Xdrawn fromp0andp1, or - with generators
p0andp1implementing sampling throughrvs.
class ClassifierRatio(BaseEstimator, DensityRatioMixin): """Classifier-based density ratio estimator. This class approximates a density ratio `r(x) = p0(x) / p1(x)` as `s(x) / 1 - s(x)`, where `s` is a classifier trained to distinguish samples `x ~ p0` from samples `x ~ p1`, and where `s(x)` is the classifier approximate of the probability `p0(x) / (p0(x) + p1(x))`. This class can be used in the likelihood-free setup, i.e. either - with known data `X` drawn from `p0` and `p1`, or - with generators `p0` and `p1` implementing sampling through `rvs`. """ def __init__(self, base_estimator, random_state=None): """Constructor. Parameters ---------- * `base_estimator` [`BaseEstimator`]: A scikit-learn classifier or regressor. * `random_state` [integer or RandomState object]: The random seed. """ self.base_estimator = base_estimator self.random_state = random_state def fit(self, X=None, y=None, sample_weight=None, numerator=None, denominator=None, n_samples=None, **kwargs): """Fit the density ratio estimator. The density ratio estimator `r(x) = p0(x) / p1(x)` can be fit either - from data, using `fit(X, y)` or - from distributions, using `fit(numerator=p0, denominator=p1, n_samples=N)` Parameters ---------- * `X` [array-like, shape=(n_samples, n_features), optional]: Training data. * `y` [array-like, shape=(n_samples,), optional]: Labels. Samples labeled with `y=0` correspond to data from the numerator distribution, while samples labeled with `y=1` correspond data from the denominator distribution. * `sample_weight` [array-like, shape=(n_samples,), optional]: The sample weights. * `numerator` [`DistributionMixin`, optional]: The numerator distribution `p0`, if `X` and `y` are not provided. This object is required to implement sampling through the `rvs` method. * `denominator` [`DistributionMixin`, optional]: The denominator distribution `p1`, if `X` and `y` are not provided. This object is required to implement sampling through the `rvs` method. * `n_samples` [integer, optional] The total number of samples to draw from the numerator and denominator distributions, if `X` and `y` are not provided. Returns ------- * `self` [object]: `self`. """ # Check for identity self.identity_ = (numerator is not None) and (numerator is denominator) if self.identity_: return self # Build training data rng = check_random_state(self.random_state) if (numerator is not None and denominator is not None and n_samples is not None): X = np.vstack( (numerator.rvs(n_samples // 2, random_state=rng, **kwargs), denominator.rvs(n_samples - (n_samples // 2), random_state=rng, **kwargs))) y = np.zeros(n_samples, dtype=np.int) y[n_samples // 2:] = 1 sample_weight = None elif X is not None and y is not None: if sample_weight is None: n_num = (y == 0).sum() n_den = (y == 1).sum() if n_num != n_den: sample_weight = np.ones(len(y), dtype=np.float) sample_weight[y == 1] *= 1.0 * n_num / n_den else: sample_weight = None else: raise ValueError # Fit base estimator clf = clone(self.base_estimator) if isinstance(clf, RegressorMixin): clf = as_classifier(clf) if sample_weight is None: self.classifier_ = clf.fit(X, y) else: self.classifier_ = clf.fit(X, y, sample_weight=sample_weight) return self def predict(self, X, log=False, **kwargs): """Predict the density ratio `r(x_i)` for all `x_i` in `X`. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. * `log` [boolean, default=False]: If true, return the log-ratio `log r(x) = log(p0(x)) - log(p1(x))`. Returns ------- * `r` [array, shape=(n_samples,)]: The predicted ratio `r(X)`. """ if self.identity_: if log: return np.zeros(len(X)) else: return np.ones(len(X)) else: p = self.classifier_.predict_proba(X) if log: return np.log(p[:, 0]) - np.log(p[:, 1]) else: return np.divide(p[:, 0], p[:, 1])
Ancestors (in MRO)
- ClassifierRatio
- sklearn.base.BaseEstimator
- DensityRatioMixin
- builtins.object
Static methods
def __init__(
self, base_estimator, random_state=None)
Constructor.
Parameters
-
base_estimator[BaseEstimator]: A scikit-learn classifier or regressor. -
random_state[integer or RandomState object]: The random seed.
def __init__(self, base_estimator, random_state=None): """Constructor. Parameters ---------- * `base_estimator` [`BaseEstimator`]: A scikit-learn classifier or regressor. * `random_state` [integer or RandomState object]: The random seed. """ self.base_estimator = base_estimator self.random_state = random_state
def fit(
self, X=None, y=None, sample_weight=None, numerator=None, denominator=None, n_samples=None, **kwargs)
Fit the density ratio estimator.
The density ratio estimator r(x) = p0(x) / p1(x) can be fit either
- from data, using
fit(X, y)or - from distributions, using
fit(numerator=p0, denominator=p1, n_samples=N)
Parameters
-
X[array-like, shape=(n_samples, n_features), optional]: Training data. -
y[array-like, shape=(n_samples,), optional]: Labels. Samples labeled withy=0correspond to data from the numerator distribution, while samples labeled withy=1correspond data from the denominator distribution. -
sample_weight[array-like, shape=(n_samples,), optional]: The sample weights. -
numerator[DistributionMixin, optional]: The numerator distributionp0, ifXandyare not provided. This object is required to implement sampling through thervsmethod. -
denominator[DistributionMixin, optional]: The denominator distributionp1, ifXandyare not provided. This object is required to implement sampling through thervsmethod. -
n_samples[integer, optional] The total number of samples to draw from the numerator and denominator distributions, ifXandyare not provided.
Returns
self[object]:self.
def fit(self, X=None, y=None, sample_weight=None, numerator=None, denominator=None, n_samples=None, **kwargs): """Fit the density ratio estimator. The density ratio estimator `r(x) = p0(x) / p1(x)` can be fit either - from data, using `fit(X, y)` or - from distributions, using `fit(numerator=p0, denominator=p1, n_samples=N)` Parameters ---------- * `X` [array-like, shape=(n_samples, n_features), optional]: Training data. * `y` [array-like, shape=(n_samples,), optional]: Labels. Samples labeled with `y=0` correspond to data from the numerator distribution, while samples labeled with `y=1` correspond data from the denominator distribution. * `sample_weight` [array-like, shape=(n_samples,), optional]: The sample weights. * `numerator` [`DistributionMixin`, optional]: The numerator distribution `p0`, if `X` and `y` are not provided. This object is required to implement sampling through the `rvs` method. * `denominator` [`DistributionMixin`, optional]: The denominator distribution `p1`, if `X` and `y` are not provided. This object is required to implement sampling through the `rvs` method. * `n_samples` [integer, optional] The total number of samples to draw from the numerator and denominator distributions, if `X` and `y` are not provided. Returns ------- * `self` [object]: `self`. """ # Check for identity self.identity_ = (numerator is not None) and (numerator is denominator) if self.identity_: return self # Build training data rng = check_random_state(self.random_state) if (numerator is not None and denominator is not None and n_samples is not None): X = np.vstack( (numerator.rvs(n_samples // 2, random_state=rng, **kwargs), denominator.rvs(n_samples - (n_samples // 2), random_state=rng, **kwargs))) y = np.zeros(n_samples, dtype=np.int) y[n_samples // 2:] = 1 sample_weight = None elif X is not None and y is not None: if sample_weight is None: n_num = (y == 0).sum() n_den = (y == 1).sum() if n_num != n_den: sample_weight = np.ones(len(y), dtype=np.float) sample_weight[y == 1] *= 1.0 * n_num / n_den else: sample_weight = None else: raise ValueError # Fit base estimator clf = clone(self.base_estimator) if isinstance(clf, RegressorMixin): clf = as_classifier(clf) if sample_weight is None: self.classifier_ = clf.fit(X, y) else: self.classifier_ = clf.fit(X, y, sample_weight=sample_weight) return self
def nllr(
self, X, **kwargs)
Negative log-likelihood ratio.
This method is a shortcut for -ratio.predict(X, log=True).sum().
Parameters
X[array-like, shape=(n_samples, n_features)]: The samples.
Returns
nllr[float]: The negative log-likelihood ratio.
def nllr(self, X, **kwargs): """Negative log-likelihood ratio. This method is a shortcut for `-ratio.predict(X, log=True).sum()`. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. Returns ------- * `nllr` [float]: The negative log-likelihood ratio. """ ratios = self.predict(X, log=True, **kwargs) mask = np.isfinite(ratios) if mask.sum() < len(ratios): warnings.warn("r(X) contains non-finite values.") return -np.sum(ratios[mask])
def predict(
self, X, log=False, **kwargs)
Predict the density ratio r(x_i) for all x_i in X.
Parameters
-
X[array-like, shape=(n_samples, n_features)]: The samples. -
log[boolean, default=False]: If true, return the log-ratiolog r(x) = log(p0(x)) - log(p1(x)).
Returns
r[array, shape=(n_samples,)]: The predicted ratior(X).
def predict(self, X, log=False, **kwargs): """Predict the density ratio `r(x_i)` for all `x_i` in `X`. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. * `log` [boolean, default=False]: If true, return the log-ratio `log r(x) = log(p0(x)) - log(p1(x))`. Returns ------- * `r` [array, shape=(n_samples,)]: The predicted ratio `r(X)`. """ if self.identity_: if log: return np.zeros(len(X)) else: return np.ones(len(X)) else: p = self.classifier_.predict_proba(X) if log: return np.log(p[:, 0]) - np.log(p[:, 1]) else: return np.divide(p[:, 0], p[:, 1])
def score(
self, X, y, finite_only=True, **kwargs)
Negative MSE between predicted and known ratios.
The higher, the better.
Parameters
-
X[array-like, shape=(n_samples, n_features)]: The samples. -
y[array-like, shape=(n_samples,)]: The known ratios forX. -
finite_only[boolean, default=True]: Evaluate the MSE only over finite ratios.
Returns
score[float]: The negative MSE.
def score(self, X, y, finite_only=True, **kwargs): """Negative MSE between predicted and known ratios. The higher, the better. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. * `y` [array-like, shape=(n_samples,)]: The known ratios for `X`. * `finite_only` [boolean, default=True]: Evaluate the MSE only over finite ratios. Returns ------- * `score` [float]: The negative MSE. """ ratios = self.predict(X, **kwargs) if finite_only: mask = np.isfinite(ratios) y = y[mask] ratios = ratios[mask] return -mean_squared_error(y, ratios)
Instance variables
var base_estimator
var random_state
class DecomposedRatio
Decompose a ratio of mixtures into a weighted sum of sub-ratios.
If numerator p0 and denominator p1 distributions are known to be
mixtures p0(x) = \sum_i w_i p0_i(x) and p1(x) = \sum_j w_j p1_j(x),
then this class can be used to decompose the ratio r(x) = p0(x) / p1(x)
into the equivalent form r(x) = \sum_i [\sum_j r_ji(x)]^-1 where
r_ji(x) are sub-ratios for p1_j(x) / p0_i(x). This usually allows
for more accurate estimates.
The given numerator and denominator distributions are expected to
follow the Mixture API.
class DecomposedRatio(DensityRatioMixin, BaseEstimator): """Decompose a ratio of mixtures into a weighted sum of sub-ratios. If numerator `p0` and denominator `p1` distributions are known to be mixtures `p0(x) = \sum_i w_i p0_i(x)` and `p1(x) = \sum_j w_j p1_j(x)`, then this class can be used to decompose the ratio `r(x) = p0(x) / p1(x)` into the equivalent form `r(x) = \sum_i [\sum_j r_ji(x)]^-1` where `r_ji(x)` are sub-ratios for `p1_j(x) / p0_i(x)`. This usually allows for more accurate estimates. The given numerator and denominator distributions are expected to follow the `Mixture` API. """ def __init__(self, base_ratio): """Constructor. Parameters ---------- * `base_ratio` [`DensityRatioMixin`]: The base ratio to use for modeling the sub-ratios `r_ji(x)`. """ self.base_ratio = base_ratio def fit(self, X=None, y=None, numerator=None, denominator=None, n_samples=None, **kwargs): """Fit the decomposed density ratio estimator. The decomposed density ratio estimator can be fit only from distributions, using `fit(numerator=p0, denominator=p1, n_samples=N)`. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features), optional]: Not supported. * `y` [array-like, shape=(n_samples,), optional]: Not supported * `numerator` [`Mixture`, optional]: The numerator mixture `p0`. * `denominator` [`Mixture`, optional]: The denominator mixture `p1`. * `n_samples` [integer, optional] The total number of samples to draw from the numerator and denominator distributions. If numerator and denominator are made of `p` and `q` components, then `n_samples // (p * q)` samples are drawn per sub-ratio `r_ji(x)` to fit. Returns ------- * `self` [object]: `self`. """ self.identity_ = (numerator is not None) and (numerator is denominator) if self.identity_: return self if numerator is None or denominator is None or n_samples is None: raise ValueError self.ratios_ = {} self.ratios_map_ = {} self.numerator_ = numerator self.denominator_ = denominator n_samples_ij = n_samples // (len(numerator.components) * len(denominator.components)) # XXX in case of identities or inverses, samples are thrown away! # we should first check whether these cases exist, and then # assign n_samples to each sub-ratio for i, p_i in enumerate(numerator.components): for j, p_j in enumerate(denominator.components): if (p_i, p_j) in self.ratios_map_: ratio = InverseRatio( self.ratios_[self.ratios_map_[(p_i, p_j)]]) else: ratio = clone(self.base_ratio) ratio.fit(numerator=p_j, denominator=p_i, n_samples=n_samples_ij) self.ratios_[(j, i)] = ratio self.ratios_map_[(p_j, p_i)] = (j, i) return self def predict(self, X, log=False, **kwargs): """Predict the density ratio `r(x_i)` for all `x_i` in `X`. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. * `log` [boolean, default=False]: If true, return the log-ratio `log r(x) = log(p0(x)) - log(p1(x))`. Returns ------- * `r` [array, shape=(n_samples,)]: The predicted ratio `r(X)`. """ if self.identity_: if log: return np.zeros(len(X)) else: return np.ones(len(X)) else: w_num = self.numerator_.compute_weights(**kwargs) w_den = self.denominator_.compute_weights(**kwargs) r = np.zeros(len(X)) for i, w_i in enumerate(w_num): s = np.zeros(len(X)) for j, w_j in enumerate(w_den): s += w_j * self.ratios_[(j, i)].predict(X, **kwargs) r += w_i / s if log: return np.log(r) else: return r
Ancestors (in MRO)
- DecomposedRatio
- DensityRatioMixin
- sklearn.base.BaseEstimator
- builtins.object
Static methods
def __init__(
self, base_ratio)
Constructor.
Parameters
base_ratio[DensityRatioMixin]: The base ratio to use for modeling the sub-ratiosr_ji(x).
def __init__(self, base_ratio): """Constructor. Parameters ---------- * `base_ratio` [`DensityRatioMixin`]: The base ratio to use for modeling the sub-ratios `r_ji(x)`. """ self.base_ratio = base_ratio
def fit(
self, X=None, y=None, numerator=None, denominator=None, n_samples=None, **kwargs)
Fit the decomposed density ratio estimator.
The decomposed density ratio estimator can be fit only from
distributions, using fit(numerator=p0, denominator=p1, n_samples=N).
Parameters
-
X[array-like, shape=(n_samples, n_features), optional]: Not supported. -
y[array-like, shape=(n_samples,), optional]: Not supported -
numerator[Mixture, optional]: The numerator mixturep0. -
denominator[Mixture, optional]: The denominator mixturep1. -
n_samples[integer, optional] The total number of samples to draw from the numerator and denominator distributions. If numerator and denominator are made ofpandqcomponents, thenn_samples // (p * q)samples are drawn per sub-ratior_ji(x)to fit.
Returns
self[object]:self.
def fit(self, X=None, y=None, numerator=None, denominator=None, n_samples=None, **kwargs): """Fit the decomposed density ratio estimator. The decomposed density ratio estimator can be fit only from distributions, using `fit(numerator=p0, denominator=p1, n_samples=N)`. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features), optional]: Not supported. * `y` [array-like, shape=(n_samples,), optional]: Not supported * `numerator` [`Mixture`, optional]: The numerator mixture `p0`. * `denominator` [`Mixture`, optional]: The denominator mixture `p1`. * `n_samples` [integer, optional] The total number of samples to draw from the numerator and denominator distributions. If numerator and denominator are made of `p` and `q` components, then `n_samples // (p * q)` samples are drawn per sub-ratio `r_ji(x)` to fit. Returns ------- * `self` [object]: `self`. """ self.identity_ = (numerator is not None) and (numerator is denominator) if self.identity_: return self if numerator is None or denominator is None or n_samples is None: raise ValueError self.ratios_ = {} self.ratios_map_ = {} self.numerator_ = numerator self.denominator_ = denominator n_samples_ij = n_samples // (len(numerator.components) * len(denominator.components)) # XXX in case of identities or inverses, samples are thrown away! # we should first check whether these cases exist, and then # assign n_samples to each sub-ratio for i, p_i in enumerate(numerator.components): for j, p_j in enumerate(denominator.components): if (p_i, p_j) in self.ratios_map_: ratio = InverseRatio( self.ratios_[self.ratios_map_[(p_i, p_j)]]) else: ratio = clone(self.base_ratio) ratio.fit(numerator=p_j, denominator=p_i, n_samples=n_samples_ij) self.ratios_[(j, i)] = ratio self.ratios_map_[(p_j, p_i)] = (j, i) return self
def nllr(
self, X, **kwargs)
Negative log-likelihood ratio.
This method is a shortcut for -ratio.predict(X, log=True).sum().
Parameters
X[array-like, shape=(n_samples, n_features)]: The samples.
Returns
nllr[float]: The negative log-likelihood ratio.
def nllr(self, X, **kwargs): """Negative log-likelihood ratio. This method is a shortcut for `-ratio.predict(X, log=True).sum()`. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. Returns ------- * `nllr` [float]: The negative log-likelihood ratio. """ ratios = self.predict(X, log=True, **kwargs) mask = np.isfinite(ratios) if mask.sum() < len(ratios): warnings.warn("r(X) contains non-finite values.") return -np.sum(ratios[mask])
def predict(
self, X, log=False, **kwargs)
Predict the density ratio r(x_i) for all x_i in X.
Parameters
-
X[array-like, shape=(n_samples, n_features)]: The samples. -
log[boolean, default=False]: If true, return the log-ratiolog r(x) = log(p0(x)) - log(p1(x)).
Returns
r[array, shape=(n_samples,)]: The predicted ratior(X).
def predict(self, X, log=False, **kwargs): """Predict the density ratio `r(x_i)` for all `x_i` in `X`. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. * `log` [boolean, default=False]: If true, return the log-ratio `log r(x) = log(p0(x)) - log(p1(x))`. Returns ------- * `r` [array, shape=(n_samples,)]: The predicted ratio `r(X)`. """ if self.identity_: if log: return np.zeros(len(X)) else: return np.ones(len(X)) else: w_num = self.numerator_.compute_weights(**kwargs) w_den = self.denominator_.compute_weights(**kwargs) r = np.zeros(len(X)) for i, w_i in enumerate(w_num): s = np.zeros(len(X)) for j, w_j in enumerate(w_den): s += w_j * self.ratios_[(j, i)].predict(X, **kwargs) r += w_i / s if log: return np.log(r) else: return r
def score(
self, X, y, finite_only=True, **kwargs)
Negative MSE between predicted and known ratios.
The higher, the better.
Parameters
-
X[array-like, shape=(n_samples, n_features)]: The samples. -
y[array-like, shape=(n_samples,)]: The known ratios forX. -
finite_only[boolean, default=True]: Evaluate the MSE only over finite ratios.
Returns
score[float]: The negative MSE.
def score(self, X, y, finite_only=True, **kwargs): """Negative MSE between predicted and known ratios. The higher, the better. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. * `y` [array-like, shape=(n_samples,)]: The known ratios for `X`. * `finite_only` [boolean, default=True]: Evaluate the MSE only over finite ratios. Returns ------- * `score` [float]: The negative MSE. """ ratios = self.predict(X, **kwargs) if finite_only: mask = np.isfinite(ratios) y = y[mask] ratios = ratios[mask] return -mean_squared_error(y, ratios)
Instance variables
var base_ratio
class DensityRatioMixin
Density ratio mixin.
This class defines the common API for density ratio estimators.
class DensityRatioMixin: """Density ratio mixin. This class defines the common API for density ratio estimators. """ def fit(self, X=None, y=None, numerator=None, denominator=None, n_samples=None, **kwargs): """Fit the density ratio estimator. The density ratio estimator `r(x) = p0(x) / p1(x)` can be fit either - from data, using `fit(X, y)` or - from distributions, using `fit(numerator=p0, denominator=p1, n_samples=N)` Parameters ---------- * `X` [array-like, shape=(n_samples, n_features), optional]: Training data. * `y` [array-like, shape=(n_samples,), optional]: Labels. Samples labeled with `y=0` correspond to data from the numerator distribution, while samples labeled with `y=1` correspond data from the denominator distribution. * `numerator` [`DistributionMixin`, optional]: The numerator distribution `p0`, if `X` and `y` are not provided. * `denominator` [`DistributionMixin`, optional]: The denominator distribution `p1`, if `X` and `y` are not provided. * `n_samples` [integer, optional] The total number of samples to draw from the numerator and denominator distributions, if `X` and `y` are not provided. Returns ------- * `self` [object]: `self`. """ return self def predict(self, X, log=False, **kwargs): """Predict the density ratio `r(x_i)` for all `x_i` in `X`. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. * `log` [boolean, default=False]: If true, return the log-ratio `log r(x) = log(p0(x)) - log(p1(x))`. Returns ------- * `r` [array, shape=(n_samples,)]: The predicted ratio `r(X)`. """ raise NotImplementedError def nllr(self, X, **kwargs): """Negative log-likelihood ratio. This method is a shortcut for `-ratio.predict(X, log=True).sum()`. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. Returns ------- * `nllr` [float]: The negative log-likelihood ratio. """ ratios = self.predict(X, log=True, **kwargs) mask = np.isfinite(ratios) if mask.sum() < len(ratios): warnings.warn("r(X) contains non-finite values.") return -np.sum(ratios[mask]) def score(self, X, y, finite_only=True, **kwargs): """Negative MSE between predicted and known ratios. The higher, the better. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. * `y` [array-like, shape=(n_samples,)]: The known ratios for `X`. * `finite_only` [boolean, default=True]: Evaluate the MSE only over finite ratios. Returns ------- * `score` [float]: The negative MSE. """ ratios = self.predict(X, **kwargs) if finite_only: mask = np.isfinite(ratios) y = y[mask] ratios = ratios[mask] return -mean_squared_error(y, ratios)
Ancestors (in MRO)
- DensityRatioMixin
- builtins.object
Static methods
def fit(
self, X=None, y=None, numerator=None, denominator=None, n_samples=None, **kwargs)
Fit the density ratio estimator.
The density ratio estimator r(x) = p0(x) / p1(x) can be fit either
- from data, using
fit(X, y)or - from distributions, using
fit(numerator=p0, denominator=p1, n_samples=N)
Parameters
-
X[array-like, shape=(n_samples, n_features), optional]: Training data. -
y[array-like, shape=(n_samples,), optional]: Labels. Samples labeled withy=0correspond to data from the numerator distribution, while samples labeled withy=1correspond data from the denominator distribution. -
numerator[DistributionMixin, optional]: The numerator distributionp0, ifXandyare not provided. -
denominator[DistributionMixin, optional]: The denominator distributionp1, ifXandyare not provided. -
n_samples[integer, optional] The total number of samples to draw from the numerator and denominator distributions, ifXandyare not provided.
Returns
self[object]:self.
def fit(self, X=None, y=None, numerator=None, denominator=None, n_samples=None, **kwargs): """Fit the density ratio estimator. The density ratio estimator `r(x) = p0(x) / p1(x)` can be fit either - from data, using `fit(X, y)` or - from distributions, using `fit(numerator=p0, denominator=p1, n_samples=N)` Parameters ---------- * `X` [array-like, shape=(n_samples, n_features), optional]: Training data. * `y` [array-like, shape=(n_samples,), optional]: Labels. Samples labeled with `y=0` correspond to data from the numerator distribution, while samples labeled with `y=1` correspond data from the denominator distribution. * `numerator` [`DistributionMixin`, optional]: The numerator distribution `p0`, if `X` and `y` are not provided. * `denominator` [`DistributionMixin`, optional]: The denominator distribution `p1`, if `X` and `y` are not provided. * `n_samples` [integer, optional] The total number of samples to draw from the numerator and denominator distributions, if `X` and `y` are not provided. Returns ------- * `self` [object]: `self`. """ return self
def nllr(
self, X, **kwargs)
Negative log-likelihood ratio.
This method is a shortcut for -ratio.predict(X, log=True).sum().
Parameters
X[array-like, shape=(n_samples, n_features)]: The samples.
Returns
nllr[float]: The negative log-likelihood ratio.
def nllr(self, X, **kwargs): """Negative log-likelihood ratio. This method is a shortcut for `-ratio.predict(X, log=True).sum()`. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. Returns ------- * `nllr` [float]: The negative log-likelihood ratio. """ ratios = self.predict(X, log=True, **kwargs) mask = np.isfinite(ratios) if mask.sum() < len(ratios): warnings.warn("r(X) contains non-finite values.") return -np.sum(ratios[mask])
def predict(
self, X, log=False, **kwargs)
Predict the density ratio r(x_i) for all x_i in X.
Parameters
-
X[array-like, shape=(n_samples, n_features)]: The samples. -
log[boolean, default=False]: If true, return the log-ratiolog r(x) = log(p0(x)) - log(p1(x)).
Returns
r[array, shape=(n_samples,)]: The predicted ratior(X).
def predict(self, X, log=False, **kwargs): """Predict the density ratio `r(x_i)` for all `x_i` in `X`. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. * `log` [boolean, default=False]: If true, return the log-ratio `log r(x) = log(p0(x)) - log(p1(x))`. Returns ------- * `r` [array, shape=(n_samples,)]: The predicted ratio `r(X)`. """ raise NotImplementedError
def score(
self, X, y, finite_only=True, **kwargs)
Negative MSE between predicted and known ratios.
The higher, the better.
Parameters
-
X[array-like, shape=(n_samples, n_features)]: The samples. -
y[array-like, shape=(n_samples,)]: The known ratios forX. -
finite_only[boolean, default=True]: Evaluate the MSE only over finite ratios.
Returns
score[float]: The negative MSE.
def score(self, X, y, finite_only=True, **kwargs): """Negative MSE between predicted and known ratios. The higher, the better. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. * `y` [array-like, shape=(n_samples,)]: The known ratios for `X`. * `finite_only` [boolean, default=True]: Evaluate the MSE only over finite ratios. Returns ------- * `score` [float]: The negative MSE. """ ratios = self.predict(X, **kwargs) if finite_only: mask = np.isfinite(ratios) y = y[mask] ratios = ratios[mask] return -mean_squared_error(y, ratios)
class InverseRatio
Inverse a density ratio.
This class can be used to model the inverse 1 / r(x) = p1(x) / p0(x)
of a given base ratio r(x) = p0(x) / p1(x).
class InverseRatio(DensityRatioMixin, BaseEstimator): """Inverse a density ratio. This class can be used to model the inverse `1 / r(x) = p1(x) / p0(x)` of a given base ratio `r(x) = p0(x) / p1(x)`. """ def __init__(self, base_ratio): """Constructor. Parameters ---------- * `base_ratio` [`DensityRatioMixin`]: The base ratio to inverse. """ self.base_ratio = base_ratio def fit(self, X=None, y=None, numerator=None, denominator=None, n_samples=None, **kwargs): """Fit the inversed density ratio estimator. The inversed density ratio estimator `1 / r(x) = p1(x) / p0(x)` can be fit either - from data, using `fit(X, y)` or - from distributions, using `fit(numerator=p1, denominator=p0, n_samples=N)` Note that this object does not need to be fit if `base_ratio` is already fit. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features), optional]: Training data. * `y` [array-like, shape=(n_samples,), optional]: Labels. Samples labeled with `y=1` correspond to data from the numerator distribution, while samples labeled with `y=0` correspond data from the denominator distribution. * `numerator` [`DistributionMixin`, optional]: The numerator distribution `p1`, if `X` and `y` are not provided. * `denominator` [`DistributionMixin`, optional]: The denominator distribution `p0`, if `X` and `y` are not provided. * `n_samples` [integer, optional] The total number of samples to draw from the numerator and denominator distributions, if `X` and `y` are not provided. Returns ------- * `self` [object]: `self`. """ if X is not None and y is not None: self.base_ratio.fit(X=X, y=1 - y) else: self.base_ratio.fit(numerator=numerator, denominator=denominator, n_samples=n_samples) return self def predict(self, X, log=False, **kwargs): """Predict the inverse density ratio `1 / r(x_i)` for all `x_i` in `X`. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. * `log` [boolean, default=False]: If true, return the negative log-ratio `-log r(x) = log(p1(x)) - log(p0(x))`. Returns ------- * `r` [array, shape=(n_samples,)]: The predicted inverse ratio `1 / r(X)`. """ if log: return -self.base_ratio.predict(X, log=True, **kwargs) else: return 1. / self.base_ratio.predict(X, log=False, **kwargs)
Ancestors (in MRO)
- InverseRatio
- DensityRatioMixin
- sklearn.base.BaseEstimator
- builtins.object
Static methods
def __init__(
self, base_ratio)
Constructor.
Parameters
base_ratio[DensityRatioMixin]: The base ratio to inverse.
def __init__(self, base_ratio): """Constructor. Parameters ---------- * `base_ratio` [`DensityRatioMixin`]: The base ratio to inverse. """ self.base_ratio = base_ratio
def fit(
self, X=None, y=None, numerator=None, denominator=None, n_samples=None, **kwargs)
Fit the inversed density ratio estimator.
The inversed density ratio estimator 1 / r(x) = p1(x) / p0(x) can be
fit either
- from data, using
fit(X, y)or - from distributions, using
fit(numerator=p1, denominator=p0, n_samples=N)
Note that this object does not need to be fit if base_ratio
is already fit.
Parameters
-
X[array-like, shape=(n_samples, n_features), optional]: Training data. -
y[array-like, shape=(n_samples,), optional]: Labels. Samples labeled withy=1correspond to data from the numerator distribution, while samples labeled withy=0correspond data from the denominator distribution. -
numerator[DistributionMixin, optional]: The numerator distributionp1, ifXandyare not provided. -
denominator[DistributionMixin, optional]: The denominator distributionp0, ifXandyare not provided. -
n_samples[integer, optional] The total number of samples to draw from the numerator and denominator distributions, ifXandyare not provided.
Returns
self[object]:self.
def fit(self, X=None, y=None, numerator=None, denominator=None, n_samples=None, **kwargs): """Fit the inversed density ratio estimator. The inversed density ratio estimator `1 / r(x) = p1(x) / p0(x)` can be fit either - from data, using `fit(X, y)` or - from distributions, using `fit(numerator=p1, denominator=p0, n_samples=N)` Note that this object does not need to be fit if `base_ratio` is already fit. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features), optional]: Training data. * `y` [array-like, shape=(n_samples,), optional]: Labels. Samples labeled with `y=1` correspond to data from the numerator distribution, while samples labeled with `y=0` correspond data from the denominator distribution. * `numerator` [`DistributionMixin`, optional]: The numerator distribution `p1`, if `X` and `y` are not provided. * `denominator` [`DistributionMixin`, optional]: The denominator distribution `p0`, if `X` and `y` are not provided. * `n_samples` [integer, optional] The total number of samples to draw from the numerator and denominator distributions, if `X` and `y` are not provided. Returns ------- * `self` [object]: `self`. """ if X is not None and y is not None: self.base_ratio.fit(X=X, y=1 - y) else: self.base_ratio.fit(numerator=numerator, denominator=denominator, n_samples=n_samples) return self
def nllr(
self, X, **kwargs)
Negative log-likelihood ratio.
This method is a shortcut for -ratio.predict(X, log=True).sum().
Parameters
X[array-like, shape=(n_samples, n_features)]: The samples.
Returns
nllr[float]: The negative log-likelihood ratio.
def nllr(self, X, **kwargs): """Negative log-likelihood ratio. This method is a shortcut for `-ratio.predict(X, log=True).sum()`. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. Returns ------- * `nllr` [float]: The negative log-likelihood ratio. """ ratios = self.predict(X, log=True, **kwargs) mask = np.isfinite(ratios) if mask.sum() < len(ratios): warnings.warn("r(X) contains non-finite values.") return -np.sum(ratios[mask])
def predict(
self, X, log=False, **kwargs)
Predict the inverse density ratio 1 / r(x_i) for all x_i in X.
Parameters
-
X[array-like, shape=(n_samples, n_features)]: The samples. -
log[boolean, default=False]: If true, return the negative log-ratio-log r(x) = log(p1(x)) - log(p0(x)).
Returns
r[array, shape=(n_samples,)]: The predicted inverse ratio1 / r(X).
def predict(self, X, log=False, **kwargs): """Predict the inverse density ratio `1 / r(x_i)` for all `x_i` in `X`. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. * `log` [boolean, default=False]: If true, return the negative log-ratio `-log r(x) = log(p1(x)) - log(p0(x))`. Returns ------- * `r` [array, shape=(n_samples,)]: The predicted inverse ratio `1 / r(X)`. """ if log: return -self.base_ratio.predict(X, log=True, **kwargs) else: return 1. / self.base_ratio.predict(X, log=False, **kwargs)
def score(
self, X, y, finite_only=True, **kwargs)
Negative MSE between predicted and known ratios.
The higher, the better.
Parameters
-
X[array-like, shape=(n_samples, n_features)]: The samples. -
y[array-like, shape=(n_samples,)]: The known ratios forX. -
finite_only[boolean, default=True]: Evaluate the MSE only over finite ratios.
Returns
score[float]: The negative MSE.
def score(self, X, y, finite_only=True, **kwargs): """Negative MSE between predicted and known ratios. The higher, the better. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. * `y` [array-like, shape=(n_samples,)]: The known ratios for `X`. * `finite_only` [boolean, default=True]: Evaluate the MSE only over finite ratios. Returns ------- * `score` [float]: The negative MSE. """ ratios = self.predict(X, **kwargs) if finite_only: mask = np.isfinite(ratios) y = y[mask] ratios = ratios[mask] return -mean_squared_error(y, ratios)
Instance variables
var base_ratio
class KnownDensityRatio
Density ratio for known densities p0 and p1.
This class cannot be used in the likelihood-free setup. It requires
numerator and denominator distributions to implement the pdf and nll
methods.
class KnownDensityRatio(DensityRatioMixin, BaseEstimator): """Density ratio for known densities `p0` and `p1`. This class cannot be used in the likelihood-free setup. It requires numerator and denominator distributions to implement the `pdf` and `nll` methods. """ def __init__(self, numerator, denominator): """Constructor. Parameters ---------- * `numerator` [`DistributionMixin`]: The numerator distribution. This object is required to implement the `pdf` and `nll` methods. * `denominator` [`DistributionMixin`]: The denominator distribution. This object is required to implement the `pdf` and `nll` methods. """ self.numerator = numerator self.denominator = denominator def fit(self, X=None, y=None, numerator=None, denominator=None, n_samples=None, **kwargs): """Do nothing. Returns ------- * `self` [object]: `self`. """ return self def predict(self, X, log=False, **kwargs): """Predict the density ratio `r(x_i)` for all `x_i` in `X`. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. * `log` [boolean, default=False]: If true, return the log-ratio `log r(x) = log(p0(x)) - log(p1(x))`. Returns ------- * `r` [array, shape=(n_samples,)]: The predicted ratio `r(X)`. """ if not log: return self.numerator.pdf(X) / self.denominator.pdf(X) else: return -self.numerator.nll(X) + self.denominator.nll(X)
Ancestors (in MRO)
- KnownDensityRatio
- DensityRatioMixin
- sklearn.base.BaseEstimator
- builtins.object
Static methods
def __init__(
self, numerator, denominator)
Constructor.
Parameters
-
numerator[DistributionMixin]: The numerator distribution. This object is required to implement thepdfandnllmethods. -
denominator[DistributionMixin]: The denominator distribution. This object is required to implement thepdfandnllmethods.
def __init__(self, numerator, denominator): """Constructor. Parameters ---------- * `numerator` [`DistributionMixin`]: The numerator distribution. This object is required to implement the `pdf` and `nll` methods. * `denominator` [`DistributionMixin`]: The denominator distribution. This object is required to implement the `pdf` and `nll` methods. """ self.numerator = numerator self.denominator = denominator
def fit(
self, X=None, y=None, numerator=None, denominator=None, n_samples=None, **kwargs)
Do nothing.
Returns
self[object]:self.
def fit(self, X=None, y=None, numerator=None, denominator=None, n_samples=None, **kwargs): """Do nothing. Returns ------- * `self` [object]: `self`. """ return self
def nllr(
self, X, **kwargs)
Negative log-likelihood ratio.
This method is a shortcut for -ratio.predict(X, log=True).sum().
Parameters
X[array-like, shape=(n_samples, n_features)]: The samples.
Returns
nllr[float]: The negative log-likelihood ratio.
def nllr(self, X, **kwargs): """Negative log-likelihood ratio. This method is a shortcut for `-ratio.predict(X, log=True).sum()`. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. Returns ------- * `nllr` [float]: The negative log-likelihood ratio. """ ratios = self.predict(X, log=True, **kwargs) mask = np.isfinite(ratios) if mask.sum() < len(ratios): warnings.warn("r(X) contains non-finite values.") return -np.sum(ratios[mask])
def predict(
self, X, log=False, **kwargs)
Predict the density ratio r(x_i) for all x_i in X.
Parameters
-
X[array-like, shape=(n_samples, n_features)]: The samples. -
log[boolean, default=False]: If true, return the log-ratiolog r(x) = log(p0(x)) - log(p1(x)).
Returns
r[array, shape=(n_samples,)]: The predicted ratior(X).
def predict(self, X, log=False, **kwargs): """Predict the density ratio `r(x_i)` for all `x_i` in `X`. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. * `log` [boolean, default=False]: If true, return the log-ratio `log r(x) = log(p0(x)) - log(p1(x))`. Returns ------- * `r` [array, shape=(n_samples,)]: The predicted ratio `r(X)`. """ if not log: return self.numerator.pdf(X) / self.denominator.pdf(X) else: return -self.numerator.nll(X) + self.denominator.nll(X)
def score(
self, X, y, finite_only=True, **kwargs)
Negative MSE between predicted and known ratios.
The higher, the better.
Parameters
-
X[array-like, shape=(n_samples, n_features)]: The samples. -
y[array-like, shape=(n_samples,)]: The known ratios forX. -
finite_only[boolean, default=True]: Evaluate the MSE only over finite ratios.
Returns
score[float]: The negative MSE.
def score(self, X, y, finite_only=True, **kwargs): """Negative MSE between predicted and known ratios. The higher, the better. Parameters ---------- * `X` [array-like, shape=(n_samples, n_features)]: The samples. * `y` [array-like, shape=(n_samples,)]: The known ratios for `X`. * `finite_only` [boolean, default=True]: Evaluate the MSE only over finite ratios. Returns ------- * `score` [float]: The negative MSE. """ ratios = self.predict(X, **kwargs) if finite_only: mask = np.isfinite(ratios) y = y[mask] ratios = ratios[mask] return -mean_squared_error(y, ratios)
Instance variables
var denominator
var numerator
Top