# carl.ratios module

This module implements density ratio estimators.

# Carl is free software; you can redistribute it and/or modify it
# under the terms of the Revised BSD License; see LICENSE file for
# more details.

from .base import DensityRatioMixin
from .base import KnownDensityRatio
from .base import InverseRatio
from .base import DecomposedRatio
from .classifier import ClassifierRatio

__all__ = ("DensityRatioMixin",
"KnownDensityRatio",
"InverseRatio",
"DecomposedRatio",
"ClassifierRatio")


class ClassifierRatio

Classifier-based density ratio estimator.

This class approximates a density ratio r(x) = p0(x) / p1(x) as s(x) / 1 - s(x), where s is a classifier trained to distinguish samples x ~ p0 from samples x ~ p1, and where s(x) is the classifier approximate of the probability p0(x) / (p0(x) + p1(x)).

This class can be used in the likelihood-free setup, i.e. either

• with known data X drawn from p0 and p1, or
• with generators p0 and p1 implementing sampling through rvs.
class ClassifierRatio(BaseEstimator, DensityRatioMixin):
"""Classifier-based density ratio estimator.

This class approximates a density ratio r(x) = p0(x) / p1(x) as
s(x) / 1 - s(x), where s is a classifier trained to distinguish
samples x ~ p0 from samples x ~ p1, and where s(x) is the
classifier approximate of the probability p0(x) / (p0(x) + p1(x)).

This class can be used in the likelihood-free setup, i.e. either

- with known data X drawn from p0 and p1, or
- with generators p0 and p1 implementing sampling through rvs.
"""

def __init__(self, base_estimator, random_state=None):
"""Constructor.

Parameters
----------
* base_estimator [BaseEstimator]:
A scikit-learn classifier or regressor.

* random_state [integer or RandomState object]:
The random seed.
"""
self.base_estimator = base_estimator
self.random_state = random_state

def fit(self, X=None, y=None, sample_weight=None,
numerator=None, denominator=None, n_samples=None, **kwargs):
"""Fit the density ratio estimator.

The density ratio estimator r(x) = p0(x) / p1(x) can be fit either

- from data, using fit(X, y) or
- from distributions, using
fit(numerator=p0, denominator=p1, n_samples=N)

Parameters
----------
* X [array-like, shape=(n_samples, n_features), optional]:
Training data.

* y [array-like, shape=(n_samples,), optional]:
Labels. Samples labeled with y=0 correspond to data from the
numerator distribution, while samples labeled with y=1 correspond
data from the denominator distribution.

* sample_weight [array-like, shape=(n_samples,), optional]:
The sample weights.

* numerator [DistributionMixin, optional]:
The numerator distribution p0, if X and y are not provided.
This object is required to implement sampling through the rvs
method.

* denominator [DistributionMixin, optional]:
The denominator distribution p1, if X and y are not provided.
This object is required to implement sampling through the rvs
method.

* n_samples [integer, optional]
The total number of samples to draw from the numerator and
denominator distributions, if X and y are not provided.

Returns
-------
* self [object]:
self.
"""
# Check for identity
self.identity_ = (numerator is not None) and (numerator is denominator)

if self.identity_:
return self

# Build training data
rng = check_random_state(self.random_state)

if (numerator is not None and denominator is not None and
n_samples is not None):
X = np.vstack(
(numerator.rvs(n_samples // 2,
random_state=rng, **kwargs),
denominator.rvs(n_samples - (n_samples // 2),
random_state=rng, **kwargs)))
y = np.zeros(n_samples, dtype=np.int)
y[n_samples // 2:] = 1
sample_weight = None

elif X is not None and y is not None:
if sample_weight is None:
n_num = (y == 0).sum()
n_den = (y == 1).sum()

if n_num != n_den:
sample_weight = np.ones(len(y), dtype=np.float)
sample_weight[y == 1] *= 1.0 * n_num / n_den
else:
sample_weight = None

else:
raise ValueError

# Fit base estimator
clf = clone(self.base_estimator)

if isinstance(clf, RegressorMixin):
clf = as_classifier(clf)

if sample_weight is None:
self.classifier_ = clf.fit(X, y)
else:
self.classifier_ = clf.fit(X, y, sample_weight=sample_weight)

return self

def predict(self, X, log=False, **kwargs):
"""Predict the density ratio r(x_i) for all x_i in X.

Parameters
----------
* X [array-like, shape=(n_samples, n_features)]:
The samples.

* log [boolean, default=False]:
If true, return the log-ratio log r(x) = log(p0(x)) - log(p1(x)).

Returns
-------
* r [array, shape=(n_samples,)]:
The predicted ratio r(X).
"""
if self.identity_:
if log:
return np.zeros(len(X))
else:
return np.ones(len(X))

else:
p = self.classifier_.predict_proba(X)

if log:
return np.log(p[:, 0]) - np.log(p[:, 1])
else:
return np.divide(p[:, 0], p[:, 1])


def fit(self, X=None, y=None, sample_weight=None,
numerator=None, denominator=None, n_samples=None, **kwargs):
"""Fit the density ratio estimator.
The density ratio estimator r(x) = p0(x) / p1(x) can be fit either
- from data, using fit(X, y) or
- from distributions, using
fit(numerator=p0, denominator=p1, n_samples=N)
Parameters
----------
* X [array-like, shape=(n_samples, n_features), optional]:
Training data.
* y [array-like, shape=(n_samples,), optional]:
Labels. Samples labeled with y=0 correspond to data from the
numerator distribution, while samples labeled with y=1 correspond
data from the denominator distribution.
* sample_weight [array-like, shape=(n_samples,), optional]:
The sample weights.
* numerator [DistributionMixin, optional]:
The numerator distribution p0, if X and y are not provided.
This object is required to implement sampling through the rvs
method.
* denominator [DistributionMixin, optional]:
The denominator distribution p1, if X and y are not provided.
This object is required to implement sampling through the rvs
method.
* n_samples [integer, optional]
The total number of samples to draw from the numerator and
denominator distributions, if X and y are not provided.
Returns
-------
* self [object]:
self.
"""
# Check for identity
self.identity_ = (numerator is not None) and (numerator is denominator)
if self.identity_:
return self
# Build training data
rng = check_random_state(self.random_state)
if (numerator is not None and denominator is not None and
n_samples is not None):
X = np.vstack(
(numerator.rvs(n_samples // 2,
random_state=rng, **kwargs),
denominator.rvs(n_samples - (n_samples // 2),
random_state=rng, **kwargs)))
y = np.zeros(n_samples, dtype=np.int)
y[n_samples // 2:] = 1
sample_weight = None
elif X is not None and y is not None:
if sample_weight is None:
n_num = (y == 0).sum()
n_den = (y == 1).sum()
if n_num != n_den:
sample_weight = np.ones(len(y), dtype=np.float)
sample_weight[y == 1] *= 1.0 * n_num / n_den
else:
sample_weight = None
else:
raise ValueError
# Fit base estimator
clf = clone(self.base_estimator)
if isinstance(clf, RegressorMixin):
clf = as_classifier(clf)
if sample_weight is None:
self.classifier_ = clf.fit(X, y)
else:
self.classifier_ = clf.fit(X, y, sample_weight=sample_weight)
return self


def nllr(self, X, **kwargs):
"""Negative log-likelihood ratio.
This method is a shortcut for -ratio.predict(X, log=True).sum().
Parameters
----------
* X [array-like, shape=(n_samples, n_features)]:
The samples.
Returns
-------
* nllr [float]:
The negative log-likelihood ratio.
"""
ratios = self.predict(X, log=True, **kwargs)
if mask.sum() < len(ratios):
warnings.warn("r(X) contains non-finite values.")


def predict(self, X, log=False, **kwargs):
"""Predict the density ratio r(x_i) for all x_i in X.
Parameters
----------
* X [array-like, shape=(n_samples, n_features)]:
The samples.
* log [boolean, default=False]:
If true, return the log-ratio log r(x) = log(p0(x)) - log(p1(x)).
Returns
-------
* r [array, shape=(n_samples,)]:
The predicted ratio r(X).
"""
if self.identity_:
if log:
return np.zeros(len(X))
else:
return np.ones(len(X))
else:
p = self.classifier_.predict_proba(X)
if log:
return np.log(p[:, 0]) - np.log(p[:, 1])
else:
return np.divide(p[:, 0], p[:, 1])


def score(self, X, y, finite_only=True, **kwargs):
"""Negative MSE between predicted and known ratios.
The higher, the better.
Parameters
----------
* X [array-like, shape=(n_samples, n_features)]:
The samples.
* y [array-like, shape=(n_samples,)]:
The known ratios for X.
* finite_only [boolean, default=True]:
Evaluate the MSE only over finite ratios.
Returns
-------
* score [float]:
The negative MSE.
"""
ratios = self.predict(X, **kwargs)
if finite_only:
return -mean_squared_error(y, ratios)


class DecomposedRatio

Decompose a ratio of mixtures into a weighted sum of sub-ratios.

If numerator p0 and denominator p1 distributions are known to be mixtures p0(x) = \sum_i w_i p0_i(x) and p1(x) = \sum_j w_j p1_j(x), then this class can be used to decompose the ratio r(x) = p0(x) / p1(x) into the equivalent form r(x) = \sum_i [\sum_j r_ji(x)]^-1 where r_ji(x) are sub-ratios for p1_j(x) / p0_i(x). This usually allows for more accurate estimates.

The given numerator and denominator distributions are expected to follow the Mixture API.

class DecomposedRatio(DensityRatioMixin, BaseEstimator):
"""Decompose a ratio of mixtures into a weighted sum of sub-ratios.

If numerator p0 and denominator p1 distributions are known to be
mixtures p0(x) = \sum_i w_i p0_i(x) and p1(x) = \sum_j w_j p1_j(x),
then this  class can be used to decompose the ratio r(x) = p0(x) / p1(x)
into the equivalent form r(x) = \sum_i [\sum_j r_ji(x)]^-1 where
r_ji(x) are sub-ratios for p1_j(x) / p0_i(x). This usually allows
for more accurate estimates.

The given numerator and denominator distributions are expected to
follow the Mixture API.
"""

def __init__(self, base_ratio):
"""Constructor.

Parameters
----------
* base_ratio [DensityRatioMixin]:
The base ratio to use for modeling the sub-ratios r_ji(x).
"""
self.base_ratio = base_ratio

def fit(self, X=None, y=None, numerator=None,
denominator=None, n_samples=None, **kwargs):
"""Fit the decomposed density ratio estimator.

The decomposed density ratio estimator can be fit only from
distributions, using fit(numerator=p0, denominator=p1, n_samples=N).

Parameters
----------
* X [array-like, shape=(n_samples, n_features), optional]:
Not supported.

* y [array-like, shape=(n_samples,), optional]:
Not supported

* numerator [Mixture, optional]:
The numerator mixture p0.

* denominator [Mixture, optional]:
The denominator mixture p1.

* n_samples [integer, optional]
The total number of samples to draw from the numerator and
denominator distributions.
If numerator and denominator are made of p and q components,
then n_samples // (p * q) samples are drawn per sub-ratio
r_ji(x) to fit.

Returns
-------
* self [object]:
self.
"""
self.identity_ = (numerator is not None) and (numerator is denominator)

if self.identity_:
return self

if numerator is None or denominator is None or n_samples is None:
raise ValueError

self.ratios_ = {}
self.ratios_map_ = {}
self.numerator_ = numerator
self.denominator_ = denominator

n_samples_ij = n_samples // (len(numerator.components) *
len(denominator.components))

# XXX in case of identities or inverses, samples are thrown away!
#     we should first check whether these cases exist, and then
#     assign n_samples to each sub-ratio

for i, p_i in enumerate(numerator.components):
for j, p_j in enumerate(denominator.components):
if (p_i, p_j) in self.ratios_map_:
ratio = InverseRatio(
self.ratios_[self.ratios_map_[(p_i, p_j)]])

else:
ratio = clone(self.base_ratio)
ratio.fit(numerator=p_j, denominator=p_i,
n_samples=n_samples_ij)

self.ratios_[(j, i)] = ratio
self.ratios_map_[(p_j, p_i)] = (j, i)

return self

def predict(self, X, log=False, **kwargs):
"""Predict the density ratio r(x_i) for all x_i in X.

Parameters
----------
* X [array-like, shape=(n_samples, n_features)]:
The samples.

* log [boolean, default=False]:
If true, return the log-ratio log r(x) = log(p0(x)) - log(p1(x)).

Returns
-------
* r [array, shape=(n_samples,)]:
The predicted ratio r(X).
"""
if self.identity_:
if log:
return np.zeros(len(X))
else:
return np.ones(len(X))

else:
w_num = self.numerator_.compute_weights(**kwargs)
w_den = self.denominator_.compute_weights(**kwargs)

r = np.zeros(len(X))

for i, w_i in enumerate(w_num):
s = np.zeros(len(X))

for j, w_j in enumerate(w_den):
s += w_j * self.ratios_[(j, i)].predict(X, **kwargs)

r += w_i / s

if log:
return np.log(r)
else:
return r


def fit(self, X=None, y=None, numerator=None,
denominator=None, n_samples=None, **kwargs):
"""Fit the decomposed density ratio estimator.
The decomposed density ratio estimator can be fit only from
distributions, using fit(numerator=p0, denominator=p1, n_samples=N).
Parameters
----------
* X [array-like, shape=(n_samples, n_features), optional]:
Not supported.
* y [array-like, shape=(n_samples,), optional]:
Not supported
* numerator [Mixture, optional]:
The numerator mixture p0.
* denominator [Mixture, optional]:
The denominator mixture p1.
* n_samples [integer, optional]
The total number of samples to draw from the numerator and
denominator distributions.
If numerator and denominator are made of p and q components,
then n_samples // (p * q) samples are drawn per sub-ratio
r_ji(x) to fit.
Returns
-------
* self [object]:
self.
"""
self.identity_ = (numerator is not None) and (numerator is denominator)
if self.identity_:
return self
if numerator is None or denominator is None or n_samples is None:
raise ValueError
self.ratios_ = {}
self.ratios_map_ = {}
self.numerator_ = numerator
self.denominator_ = denominator
n_samples_ij = n_samples // (len(numerator.components) *
len(denominator.components))
# XXX in case of identities or inverses, samples are thrown away!
#     we should first check whether these cases exist, and then
#     assign n_samples to each sub-ratio
for i, p_i in enumerate(numerator.components):
for j, p_j in enumerate(denominator.components):
if (p_i, p_j) in self.ratios_map_:
ratio = InverseRatio(
self.ratios_[self.ratios_map_[(p_i, p_j)]])
else:
ratio = clone(self.base_ratio)
ratio.fit(numerator=p_j, denominator=p_i,
n_samples=n_samples_ij)
self.ratios_[(j, i)] = ratio
self.ratios_map_[(p_j, p_i)] = (j, i)
return self


def nllr(self, X, **kwargs):
"""Negative log-likelihood ratio.
This method is a shortcut for -ratio.predict(X, log=True).sum().
Parameters
----------
* X [array-like, shape=(n_samples, n_features)]:
The samples.
Returns
-------
* nllr [float]:
The negative log-likelihood ratio.
"""
ratios = self.predict(X, log=True, **kwargs)
if mask.sum() < len(ratios):
warnings.warn("r(X) contains non-finite values.")


def predict(self, X, log=False, **kwargs):
"""Predict the density ratio r(x_i) for all x_i in X.
Parameters
----------
* X [array-like, shape=(n_samples, n_features)]:
The samples.
* log [boolean, default=False]:
If true, return the log-ratio log r(x) = log(p0(x)) - log(p1(x)).
Returns
-------
* r [array, shape=(n_samples,)]:
The predicted ratio r(X).
"""
if self.identity_:
if log:
return np.zeros(len(X))
else:
return np.ones(len(X))
else:
w_num = self.numerator_.compute_weights(**kwargs)
w_den = self.denominator_.compute_weights(**kwargs)
r = np.zeros(len(X))
for i, w_i in enumerate(w_num):
s = np.zeros(len(X))
for j, w_j in enumerate(w_den):
s += w_j * self.ratios_[(j, i)].predict(X, **kwargs)
r += w_i / s
if log:
return np.log(r)
else:
return r


def score(self, X, y, finite_only=True, **kwargs):
"""Negative MSE between predicted and known ratios.
The higher, the better.
Parameters
----------
* X [array-like, shape=(n_samples, n_features)]:
The samples.
* y [array-like, shape=(n_samples,)]:
The known ratios for X.
* finite_only [boolean, default=True]:
Evaluate the MSE only over finite ratios.
Returns
-------
* score [float]:
The negative MSE.
"""
ratios = self.predict(X, **kwargs)
if finite_only:
return -mean_squared_error(y, ratios)


class DensityRatioMixin

Density ratio mixin.

This class defines the common API for density ratio estimators.

class DensityRatioMixin:
"""Density ratio mixin.

This class defines the common API for density ratio estimators.
"""

def fit(self, X=None, y=None, numerator=None,
denominator=None, n_samples=None, **kwargs):
"""Fit the density ratio estimator.

The density ratio estimator r(x) = p0(x) / p1(x) can be fit either

- from data, using fit(X, y) or
- from distributions, using
fit(numerator=p0, denominator=p1, n_samples=N)

Parameters
----------
* X [array-like, shape=(n_samples, n_features), optional]:
Training data.

* y [array-like, shape=(n_samples,), optional]:
Labels. Samples labeled with y=0 correspond to data from the
numerator distribution, while samples labeled with y=1 correspond
data from the denominator distribution.

* numerator [DistributionMixin, optional]:
The numerator distribution p0, if X and y are not provided.

* denominator [DistributionMixin, optional]:
The denominator distribution p1, if X and y are not provided.

* n_samples [integer, optional]
The total number of samples to draw from the numerator and
denominator distributions, if X and y are not provided.

Returns
-------
* self [object]:
self.
"""
return self

def predict(self, X, log=False, **kwargs):
"""Predict the density ratio r(x_i) for all x_i in X.

Parameters
----------
* X [array-like, shape=(n_samples, n_features)]:
The samples.

* log [boolean, default=False]:
If true, return the log-ratio log r(x) = log(p0(x)) - log(p1(x)).

Returns
-------
* r [array, shape=(n_samples,)]:
The predicted ratio r(X).
"""
raise NotImplementedError

def nllr(self, X, **kwargs):
"""Negative log-likelihood ratio.

This method is a shortcut for -ratio.predict(X, log=True).sum().

Parameters
----------
* X [array-like, shape=(n_samples, n_features)]:
The samples.

Returns
-------
* nllr [float]:
The negative log-likelihood ratio.
"""
ratios = self.predict(X, log=True, **kwargs)

if mask.sum() < len(ratios):
warnings.warn("r(X) contains non-finite values.")

def score(self, X, y, finite_only=True, **kwargs):
"""Negative MSE between predicted and known ratios.

The higher, the better.

Parameters
----------
* X [array-like, shape=(n_samples, n_features)]:
The samples.

* y [array-like, shape=(n_samples,)]:
The known ratios for X.

* finite_only [boolean, default=True]:
Evaluate the MSE only over finite ratios.

Returns
-------
* score [float]:
The negative MSE.
"""
ratios = self.predict(X, **kwargs)

if finite_only:

return -mean_squared_error(y, ratios)


def fit(self, X=None, y=None, numerator=None,
denominator=None, n_samples=None, **kwargs):
"""Fit the density ratio estimator.
The density ratio estimator r(x) = p0(x) / p1(x) can be fit either
- from data, using fit(X, y) or
- from distributions, using
fit(numerator=p0, denominator=p1, n_samples=N)
Parameters
----------
* X [array-like, shape=(n_samples, n_features), optional]:
Training data.
* y [array-like, shape=(n_samples,), optional]:
Labels. Samples labeled with y=0 correspond to data from the
numerator distribution, while samples labeled with y=1 correspond
data from the denominator distribution.
* numerator [DistributionMixin, optional]:
The numerator distribution p0, if X and y are not provided.
* denominator [DistributionMixin, optional]:
The denominator distribution p1, if X and y are not provided.
* n_samples [integer, optional]
The total number of samples to draw from the numerator and
denominator distributions, if X and y are not provided.
Returns
-------
* self [object]:
self.
"""
return self


def nllr(self, X, **kwargs):
"""Negative log-likelihood ratio.
This method is a shortcut for -ratio.predict(X, log=True).sum().
Parameters
----------
* X [array-like, shape=(n_samples, n_features)]:
The samples.
Returns
-------
* nllr [float]:
The negative log-likelihood ratio.
"""
ratios = self.predict(X, log=True, **kwargs)
if mask.sum() < len(ratios):
warnings.warn("r(X) contains non-finite values.")


def predict(self, X, log=False, **kwargs):
"""Predict the density ratio r(x_i) for all x_i in X.
Parameters
----------
* X [array-like, shape=(n_samples, n_features)]:
The samples.
* log [boolean, default=False]:
If true, return the log-ratio log r(x) = log(p0(x)) - log(p1(x)).
Returns
-------
* r [array, shape=(n_samples,)]:
The predicted ratio r(X).
"""
raise NotImplementedError


def score(self, X, y, finite_only=True, **kwargs):
"""Negative MSE between predicted and known ratios.
The higher, the better.
Parameters
----------
* X [array-like, shape=(n_samples, n_features)]:
The samples.
* y [array-like, shape=(n_samples,)]:
The known ratios for X.
* finite_only [boolean, default=True]:
Evaluate the MSE only over finite ratios.
Returns
-------
* score [float]:
The negative MSE.
"""
ratios = self.predict(X, **kwargs)
if finite_only:
return -mean_squared_error(y, ratios)


class InverseRatio

Inverse a density ratio.

This class can be used to model the inverse 1 / r(x) = p1(x) / p0(x) of a given base ratio r(x) = p0(x) / p1(x).

class InverseRatio(DensityRatioMixin, BaseEstimator):
"""Inverse a density ratio.

This class can be used to model the inverse 1 / r(x) = p1(x) / p0(x)
of a given base ratio r(x) = p0(x) / p1(x).
"""

def __init__(self, base_ratio):
"""Constructor.

Parameters
----------
* base_ratio [DensityRatioMixin]:
The base ratio to inverse.
"""
self.base_ratio = base_ratio

def fit(self, X=None, y=None, numerator=None,
denominator=None, n_samples=None, **kwargs):
"""Fit the inversed density ratio estimator.

The inversed density ratio estimator 1 / r(x) = p1(x) / p0(x) can be
fit either

- from data, using fit(X, y) or
- from distributions, using
fit(numerator=p1, denominator=p0, n_samples=N)

Note that this object does not need to be fit if base_ratio

Parameters
----------
* X [array-like, shape=(n_samples, n_features), optional]:
Training data.

* y [array-like, shape=(n_samples,), optional]:
Labels. Samples labeled with y=1 correspond to data from the
numerator distribution, while samples labeled with y=0 correspond
data from the denominator distribution.

* numerator [DistributionMixin, optional]:
The numerator distribution p1, if X and y are not provided.

* denominator [DistributionMixin, optional]:
The denominator distribution p0, if X and y are not provided.

* n_samples [integer, optional]
The total number of samples to draw from the numerator and
denominator distributions, if X and y are not provided.

Returns
-------
* self [object]:
self.
"""
if X is not None and y is not None:
self.base_ratio.fit(X=X, y=1 - y)
else:
self.base_ratio.fit(numerator=numerator, denominator=denominator,
n_samples=n_samples)

return self

def predict(self, X, log=False, **kwargs):
"""Predict the inverse density ratio 1 / r(x_i) for all x_i in X.

Parameters
----------
* X [array-like, shape=(n_samples, n_features)]:
The samples.

* log [boolean, default=False]:
If true, return the negative log-ratio
-log r(x) = log(p1(x)) - log(p0(x)).

Returns
-------
* r [array, shape=(n_samples,)]:
The predicted inverse ratio 1 / r(X).
"""
if log:
return -self.base_ratio.predict(X, log=True, **kwargs)
else:
return 1. / self.base_ratio.predict(X, log=False, **kwargs)


def fit(self, X=None, y=None, numerator=None,
denominator=None, n_samples=None, **kwargs):
"""Fit the inversed density ratio estimator.
The inversed density ratio estimator 1 / r(x) = p1(x) / p0(x) can be
fit either
- from data, using fit(X, y) or
- from distributions, using
fit(numerator=p1, denominator=p0, n_samples=N)
Note that this object does not need to be fit if base_ratio
Parameters
----------
* X [array-like, shape=(n_samples, n_features), optional]:
Training data.
* y [array-like, shape=(n_samples,), optional]:
Labels. Samples labeled with y=1 correspond to data from the
numerator distribution, while samples labeled with y=0 correspond
data from the denominator distribution.
* numerator [DistributionMixin, optional]:
The numerator distribution p1, if X and y are not provided.
* denominator [DistributionMixin, optional]:
The denominator distribution p0, if X and y are not provided.
* n_samples [integer, optional]
The total number of samples to draw from the numerator and
denominator distributions, if X and y are not provided.
Returns
-------
* self [object]:
self.
"""
if X is not None and y is not None:
self.base_ratio.fit(X=X, y=1 - y)
else:
self.base_ratio.fit(numerator=numerator, denominator=denominator,
n_samples=n_samples)
return self


def nllr(self, X, **kwargs):
"""Negative log-likelihood ratio.
This method is a shortcut for -ratio.predict(X, log=True).sum().
Parameters
----------
* X [array-like, shape=(n_samples, n_features)]:
The samples.
Returns
-------
* nllr [float]:
The negative log-likelihood ratio.
"""
ratios = self.predict(X, log=True, **kwargs)
if mask.sum() < len(ratios):
warnings.warn("r(X) contains non-finite values.")


def predict(self, X, log=False, **kwargs):
"""Predict the inverse density ratio 1 / r(x_i) for all x_i in X.
Parameters
----------
* X [array-like, shape=(n_samples, n_features)]:
The samples.
* log [boolean, default=False]:
If true, return the negative log-ratio
-log r(x) = log(p1(x)) - log(p0(x)).
Returns
-------
* r [array, shape=(n_samples,)]:
The predicted inverse ratio 1 / r(X).
"""
if log:
return -self.base_ratio.predict(X, log=True, **kwargs)
else:
return 1. / self.base_ratio.predict(X, log=False, **kwargs)


def score(self, X, y, finite_only=True, **kwargs):
"""Negative MSE between predicted and known ratios.
The higher, the better.
Parameters
----------
* X [array-like, shape=(n_samples, n_features)]:
The samples.
* y [array-like, shape=(n_samples,)]:
The known ratios for X.
* finite_only [boolean, default=True]:
Evaluate the MSE only over finite ratios.
Returns
-------
* score [float]:
The negative MSE.
"""
ratios = self.predict(X, **kwargs)
if finite_only:
return -mean_squared_error(y, ratios)


class KnownDensityRatio

Density ratio for known densities p0 and p1.

This class cannot be used in the likelihood-free setup. It requires numerator and denominator distributions to implement the pdf and nll methods.

class KnownDensityRatio(DensityRatioMixin, BaseEstimator):
"""Density ratio for known densities p0 and p1.

This class cannot be used in the likelihood-free setup. It requires
numerator and denominator distributions to implement the pdf and nll
methods.
"""

def __init__(self, numerator, denominator):
"""Constructor.

Parameters
----------
* numerator [DistributionMixin]:
The numerator distribution.
This object is required to implement the pdf and nll methods.

* denominator [DistributionMixin]:
The denominator distribution.
This object is required to implement the pdf and nll methods.
"""
self.numerator = numerator
self.denominator = denominator

def fit(self, X=None, y=None, numerator=None,
denominator=None, n_samples=None, **kwargs):
"""Do nothing.

Returns
-------
* self [object]:
self.
"""
return self

def predict(self, X, log=False, **kwargs):
"""Predict the density ratio r(x_i) for all x_i in X.

Parameters
----------
* X [array-like, shape=(n_samples, n_features)]:
The samples.

* log [boolean, default=False]:
If true, return the log-ratio log r(x) = log(p0(x)) - log(p1(x)).

Returns
-------
* r [array, shape=(n_samples,)]:
The predicted ratio r(X).
"""
if not log:
return self.numerator.pdf(X) / self.denominator.pdf(X)

else:
return -self.numerator.nll(X) + self.denominator.nll(X)


def __init__(self, numerator, denominator):
"""Constructor.
Parameters
----------
* numerator [DistributionMixin]:
The numerator distribution.
This object is required to implement the pdf and nll methods.
* denominator [DistributionMixin]:
The denominator distribution.
This object is required to implement the pdf and nll methods.
"""
self.numerator = numerator
self.denominator = denominator


def fit(self, X=None, y=None, numerator=None,
denominator=None, n_samples=None, **kwargs):
"""Do nothing.
Returns
-------
* self [object]:
self.
"""
return self


def nllr(self, X, **kwargs):
"""Negative log-likelihood ratio.
This method is a shortcut for -ratio.predict(X, log=True).sum().
Parameters
----------
* X [array-like, shape=(n_samples, n_features)]:
The samples.
Returns
-------
* nllr [float]:
The negative log-likelihood ratio.
"""
ratios = self.predict(X, log=True, **kwargs)
if mask.sum() < len(ratios):
warnings.warn("r(X) contains non-finite values.")


def predict(self, X, log=False, **kwargs):
"""Predict the density ratio r(x_i) for all x_i in X.
Parameters
----------
* X [array-like, shape=(n_samples, n_features)]:
The samples.
* log [boolean, default=False]:
If true, return the log-ratio log r(x) = log(p0(x)) - log(p1(x)).
Returns
-------
* r [array, shape=(n_samples,)]:
The predicted ratio r(X).
"""
if not log:
return self.numerator.pdf(X) / self.denominator.pdf(X)
else:
return -self.numerator.nll(X) + self.denominator.nll(X)


def score(self, X, y, finite_only=True, **kwargs):
"""Negative MSE between predicted and known ratios.
The higher, the better.
Parameters
----------
* X [array-like, shape=(n_samples, n_features)]:
The samples.
* y [array-like, shape=(n_samples,)]:
The known ratios for X.
* finite_only [boolean, default=True]:
Evaluate the MSE only over finite ratios.
Returns
-------
* score [float]:
The negative MSE.
"""
ratios = self.predict(X, **kwargs)
if finite_only: