Source code for causationentropy.core.information.mutual_information

import warnings

import numpy as np
from scipy.spatial.distance import cdist
from scipy.special import digamma

from causationentropy.core.information.entropy import geometric_knn_entropy, kde_entropy
from causationentropy.core.linalg import correlation_log_determinant



[docs]
def gaussian_mutual_information(X, Y):
    r"""
    Compute mutual information for multivariate Gaussian variables using log-determinants.

    For multivariate Gaussian random variables, the mutual information has a closed-form
    expression in terms of the covariance matrices:

    .. math::

        I(X; Y) = \frac{1}{2} \log \frac{|\Sigma_X| |\Sigma_Y|}{|\Sigma_{XY}|}

    where :math:`\Sigma_X`, :math:`\Sigma_Y` are the covariance matrices of X and Y,
    and :math:`\Sigma_{XY}` is the joint covariance matrix of the concatenated vector [X, Y].

    This implementation uses correlation matrices and their log-determinants for
    numerical stability.

    Parameters
    ----------
    X : array-like of shape (n_samples, n_features_x)
        First multivariate Gaussian variable.
    Y : array-like of shape (n_samples, n_features_y)
        Second multivariate Gaussian variable. Must have the same number of samples as X.

    Returns
    -------
    I : float
        Mutual information in nats (natural units).

    Notes
    -----
    This estimator is exact for multivariate Gaussian data and provides the
    theoretical benchmark for other mutual information estimators.

    The Gaussian assumption implies:
    - All marginal and joint distributions are multivariate normal
    - Linear relationships capture all dependencies
    - Higher-order moments beyond covariance are uninformative

    For non-Gaussian data, this estimator captures only linear dependencies
    and may underestimate the true mutual information.
    """

    SX = correlation_log_determinant(X)
    SY = correlation_log_determinant(Y)
    SXY = correlation_log_determinant(np.hstack((X, Y)))

    mi = 0.5 * (SX + SY - SXY)
    return mi




[docs]
def kde_mutual_information(X, Y, bandwidth="silverman", kernel="gaussian"):
    """
    Estimate mutual information using Kernel Density Estimation.

    This function computes mutual information using the relationship:

    .. math::

        I(X; Y) = H(X) + H(Y) - H(X, Y)

    where each entropy term is estimated using KDE. The joint entropy H(X,Y)
    is computed on the concatenated space [X, Y].

    Parameters
    ----------
    X : array-like of shape (n_samples, n_features_x)
        First variable.
    Y : array-like of shape (n_samples, n_features_y)
        Second variable.
    bandwidth : str or float, default='silverman'
        Bandwidth selection method for kernel density estimation.
    kernel : str, default='gaussian'
        Kernel function type.

    Returns
    -------
    I : float
        Estimated mutual information in nats.

    Notes
    -----
    The KDE approach can capture nonlinear dependencies but is sensitive to:
    - Bandwidth selection (affects bias-variance tradeoff)
    - Curse of dimensionality for high-dimensional data
    - Sample size requirements for reliable density estimates

    Consider using k-NN methods for high-dimensional data or small samples.
    """
    XY = np.hstack((X, Y))
    Hx = kde_entropy(X, bandwidth=bandwidth, kernel=kernel)
    Hy = kde_entropy(Y, bandwidth=bandwidth, kernel=kernel)
    Hxy = kde_entropy(XY, bandwidth=bandwidth, kernel=kernel)

    mi = Hx + Hy - Hxy
    return mi




[docs]
def knn_mutual_information(X, Y, metric="euclidean", k=1):
    r"""
    Estimate mutual information using k-nearest neighbor (KSG) method.

    This function implements the Kraskov-Stögbauer-Grassberger estimator,
    which uses k-nearest neighbor statistics to estimate mutual information:

    .. math::

        I(X; Y) = \psi(k) + \psi(N) - \langle \psi(n_x + 1) + \psi(n_y + 1) \rangle

    where :math:`\psi` is the digamma function, :math:`N` is the total number of samples,
    :math:`n_x` and :math:`n_y` are the numbers of neighbors in the marginal spaces
    within the distance to the k-th neighbor in the joint space.

    Parameters
    ----------
    X : array-like of shape (n_samples, n_features_x)
        First variable.
    Y : array-like of shape (n_samples, n_features_y)
        Second variable.
    metric : str, default='euclidean'
        Distance metric for neighborhood calculations.
    k : int, default=1
        Number of nearest neighbors to consider.

    Returns
    -------
    I : float
        Estimated mutual information in nats.

    Notes
    -----
    The KSG estimator:

    - Is asymptotically consistent
    - Adapts to local density variations
    - Works well for continuous data
    - Can handle moderate dimensionality

    Choice of k involves bias-variance tradeoff:
    - Small k: Lower bias, higher variance
    - Large k: Higher bias, lower variance

    References
    ----------
    .. [1] Kraskov, A., Stögbauer, H., Grassberger, P. Estimating mutual information.
           Physical Review E 69, 066138 (2004).
    """
    # construct the joint space
    n = X.shape[0]
    JS = np.column_stack((X, Y))

    # Find the K^th smallest distance in the joint space
    D = np.sort(cdist(JS, JS, metric=metric), axis=1)[:, k]
    epsilon = D

    # Count neighbors within epsilon in marginal spaces
    Dx = cdist(X, X, metric=metric)
    nx = np.sum(Dx < epsilon[:, None], axis=1) - 1
    Dy = cdist(Y, Y, metric=metric)
    ny = np.sum(Dy < epsilon[:, None], axis=1) - 1

    # KSG Estimation formula
    I1a = digamma(k)
    I1b = digamma(n)
    I1 = I1a + I1b
    I2 = -np.mean(digamma(nx + 1) + digamma(ny + 1))
    mi = I1 + I2
    return mi




[docs]
def geometric_knn_mutual_information(X, Y, metric="euclidean", k=1):
    """
    Estimate mutual information using geometric k-nearest neighbor method.

    This function applies the geometric k-NN entropy estimator to compute
    mutual information via the entropy decomposition:

    .. math::

        I(X; Y) = H_{\text{geom}}(X) + H_{\text{geom}}(Y) - H_{\text{geom}}(X, Y)

    The geometric correction accounts for local manifold structure and
    provides improved estimates for data with non-uniform density distributions.

    Parameters
    ----------
    X : array-like of shape (n_samples, n_features_x)
        First variable.
    Y : array-like of shape (n_samples, n_features_y)
        Second variable.
    metric : str, default='euclidean'
        Distance metric for neighbor calculations.
    k : int, default=1
        Number of nearest neighbors.

    Returns
    -------
    I : float
        Estimated mutual information using geometric k-NN method.

    Notes
    -----
    This estimator is particularly effective for:
    - Data lying on lower-dimensional manifolds
    - Non-uniform density distributions
    - Cases where local geometry matters

    The geometric correction helps account for the intrinsic dimensionality
    of the data, potentially providing more accurate estimates than standard k-NN methods.

    References
    ----------
    .. [1] Lord, W.M., Sun, J., Bollt, E.M. Geometric k-nearest neighbor estimation of
           entropy and mutual information. Chaos 28, 033113 (2018).
    """
    Xdist = cdist(X, X, metric=metric)
    Ydist = cdist(Y, Y, metric=metric)
    XYdist = cdist(np.hstack((X, Y)), np.hstack((X, Y)), metric=metric)

    HX = geometric_knn_entropy(X, Xdist, k)
    HY = geometric_knn_entropy(Y, Ydist, k)
    HXY = geometric_knn_entropy(np.hstack((X, Y)), XYdist, k)

    mi = HX + HY - HXY

    # Safety check: return 0 if result is NaN or infinite
    if not np.isfinite(mi):
        warnings.warn("NaN result in geometric_knn_mutual_information. Returning 0.0")
        return 0.0

    # Ensure non-negativity
    return max(0.0, mi)