Source code for causationentropy.core.information.mutual_information

import warnings

import numpy as np
from scipy.spatial.distance import cdist
from scipy.special import digamma

from causationentropy.core.information.entropy import geometric_knn_entropy, kde_entropy
from causationentropy.core.linalg import correlation_log_determinant


[docs] def gaussian_mutual_information(X, Y): r""" Compute mutual information for multivariate Gaussian variables using log-determinants. For multivariate Gaussian random variables, the mutual information has a closed-form expression in terms of the covariance matrices: .. math:: I(X; Y) = \frac{1}{2} \log \frac{|\Sigma_X| |\Sigma_Y|}{|\Sigma_{XY}|} where :math:`\Sigma_X`, :math:`\Sigma_Y` are the covariance matrices of X and Y, and :math:`\Sigma_{XY}` is the joint covariance matrix of the concatenated vector [X, Y]. This implementation uses correlation matrices and their log-determinants for numerical stability. Parameters ---------- X : array-like of shape (n_samples, n_features_x) First multivariate Gaussian variable. Y : array-like of shape (n_samples, n_features_y) Second multivariate Gaussian variable. Must have the same number of samples as X. Returns ------- I : float Mutual information in nats (natural units). Notes ----- This estimator is exact for multivariate Gaussian data and provides the theoretical benchmark for other mutual information estimators. The Gaussian assumption implies: - All marginal and joint distributions are multivariate normal - Linear relationships capture all dependencies - Higher-order moments beyond covariance are uninformative For non-Gaussian data, this estimator captures only linear dependencies and may underestimate the true mutual information. """ SX = correlation_log_determinant(X) SY = correlation_log_determinant(Y) SXY = correlation_log_determinant(np.hstack((X, Y))) mi = 0.5 * (SX + SY - SXY) return mi
[docs] def kde_mutual_information(X, Y, bandwidth="silverman", kernel="gaussian"): """ Estimate mutual information using Kernel Density Estimation. This function computes mutual information using the relationship: .. math:: I(X; Y) = H(X) + H(Y) - H(X, Y) where each entropy term is estimated using KDE. The joint entropy H(X,Y) is computed on the concatenated space [X, Y]. Parameters ---------- X : array-like of shape (n_samples, n_features_x) First variable. Y : array-like of shape (n_samples, n_features_y) Second variable. bandwidth : str or float, default='silverman' Bandwidth selection method for kernel density estimation. kernel : str, default='gaussian' Kernel function type. Returns ------- I : float Estimated mutual information in nats. Notes ----- The KDE approach can capture nonlinear dependencies but is sensitive to: - Bandwidth selection (affects bias-variance tradeoff) - Curse of dimensionality for high-dimensional data - Sample size requirements for reliable density estimates Consider using k-NN methods for high-dimensional data or small samples. """ XY = np.hstack((X, Y)) Hx = kde_entropy(X, bandwidth=bandwidth, kernel=kernel) Hy = kde_entropy(Y, bandwidth=bandwidth, kernel=kernel) Hxy = kde_entropy(XY, bandwidth=bandwidth, kernel=kernel) mi = Hx + Hy - Hxy return mi
[docs] def knn_mutual_information(X, Y, metric="euclidean", k=1): r""" Estimate mutual information using k-nearest neighbor (KSG) method. This function implements the Kraskov-Stögbauer-Grassberger estimator, which uses k-nearest neighbor statistics to estimate mutual information: .. math:: I(X; Y) = \psi(k) + \psi(N) - \langle \psi(n_x + 1) + \psi(n_y + 1) \rangle where :math:`\psi` is the digamma function, :math:`N` is the total number of samples, :math:`n_x` and :math:`n_y` are the numbers of neighbors in the marginal spaces within the distance to the k-th neighbor in the joint space. Parameters ---------- X : array-like of shape (n_samples, n_features_x) First variable. Y : array-like of shape (n_samples, n_features_y) Second variable. metric : str, default='euclidean' Distance metric for neighborhood calculations. k : int, default=1 Number of nearest neighbors to consider. Returns ------- I : float Estimated mutual information in nats. Notes ----- The KSG estimator: - Is asymptotically consistent - Adapts to local density variations - Works well for continuous data - Can handle moderate dimensionality Choice of k involves bias-variance tradeoff: - Small k: Lower bias, higher variance - Large k: Higher bias, lower variance References ---------- .. [1] Kraskov, A., Stögbauer, H., Grassberger, P. Estimating mutual information. Physical Review E 69, 066138 (2004). """ # construct the joint space n = X.shape[0] JS = np.column_stack((X, Y)) # Find the K^th smallest distance in the joint space D = np.sort(cdist(JS, JS, metric=metric), axis=1)[:, k] epsilon = D # Count neighbors within epsilon in marginal spaces Dx = cdist(X, X, metric=metric) nx = np.sum(Dx < epsilon[:, None], axis=1) - 1 Dy = cdist(Y, Y, metric=metric) ny = np.sum(Dy < epsilon[:, None], axis=1) - 1 # KSG Estimation formula I1a = digamma(k) I1b = digamma(n) I1 = I1a + I1b I2 = -np.mean(digamma(nx + 1) + digamma(ny + 1)) mi = I1 + I2 return mi
[docs] def geometric_knn_mutual_information(X, Y, metric="euclidean", k=1): """ Estimate mutual information using geometric k-nearest neighbor method. This function applies the geometric k-NN entropy estimator to compute mutual information via the entropy decomposition: .. math:: I(X; Y) = H_{\text{geom}}(X) + H_{\text{geom}}(Y) - H_{\text{geom}}(X, Y) The geometric correction accounts for local manifold structure and provides improved estimates for data with non-uniform density distributions. Parameters ---------- X : array-like of shape (n_samples, n_features_x) First variable. Y : array-like of shape (n_samples, n_features_y) Second variable. metric : str, default='euclidean' Distance metric for neighbor calculations. k : int, default=1 Number of nearest neighbors. Returns ------- I : float Estimated mutual information using geometric k-NN method. Notes ----- This estimator is particularly effective for: - Data lying on lower-dimensional manifolds - Non-uniform density distributions - Cases where local geometry matters The geometric correction helps account for the intrinsic dimensionality of the data, potentially providing more accurate estimates than standard k-NN methods. References ---------- .. [1] Lord, W.M., Sun, J., Bollt, E.M. Geometric k-nearest neighbor estimation of entropy and mutual information. Chaos 28, 033113 (2018). """ Xdist = cdist(X, X, metric=metric) Ydist = cdist(Y, Y, metric=metric) XYdist = cdist(np.hstack((X, Y)), np.hstack((X, Y)), metric=metric) HX = geometric_knn_entropy(X, Xdist, k) HY = geometric_knn_entropy(Y, Ydist, k) HXY = geometric_knn_entropy(np.hstack((X, Y)), XYdist, k) mi = HX + HY - HXY # Safety check: return 0 if result is NaN or infinite if not np.isfinite(mi): warnings.warn("NaN result in geometric_knn_mutual_information. Returning 0.0") return 0.0 # Ensure non-negativity return max(0.0, mi)