Source code for causationentropy.core.information.entropy

import warnings

import numpy as np
import scipy
from scipy.special import gamma, i0, i1
from scipy.stats import nbinom
from sklearn.neighbors import KernelDensity


[docs] def l2dist(a, b): r""" Compute the Euclidean (L2) distance between two points. .. math:: d(a, b) = ||a - b||_2 = \sqrt{\sum_{i=1}^{n} (a_i - b_i)^2} Parameters ---------- a, b : array-like Input points or vectors. Returns ------- distance : float Euclidean distance between a and b. """ return np.linalg.norm(a - b)
[docs] def hyperellipsoid_check(svd_Yi, Z_i): """ Check if points lie within a hyperellipsoid defined by SVD components. This function determines whether points in Z_i fall within the unit hyperellipsoid defined by the singular value decomposition of Yi. Parameters ---------- svd_Yi : tuple SVD decomposition (U, S, Vt) of the reference matrix. Z_i : array-like Points to test for inclusion in the hyperellipsoid. Returns ------- inside : bool True if all points lie within the hyperellipsoid, False otherwise. Notes ----- This is used in the geometric k-NN entropy estimation to assess the local geometric configuration of nearest neighbors. """ # U, S, Vt = svd_Yi # transformed = np.dot(Z_i, Vt.T) / S # return np.sum(transformed ** 2) <= 1 U, S, Vt = svd_Yi r = len(S) # local rank transformed = (Z_i @ Vt.T[:, :r]) / S return (transformed**2).sum() <= 1
[docs] def kde_entropy(X, bandwidth="silverman", kernel="gaussian"): r""" Estimate entropy using Kernel Density Estimation (KDE). This function computes the differential entropy of a continuous random variable using kernel density estimation. The entropy is defined as: .. math:: H(X) = -\int f(x) \log f(x) \, dx where :math:`f(x)` is the probability density function estimated via KDE: .. math:: \hat{f}(x) = \frac{1}{nh} \sum_{i=1}^{n} K\left(\frac{x - x_i}{h}\right) with kernel function :math:`K` and bandwidth :math:`h`. Parameters ---------- X : array-like of shape (n_samples, n_features) Input data for entropy estimation. bandwidth : str or float, default='silverman' Bandwidth selection method or explicit bandwidth value. If 'silverman', uses Silverman's rule of thumb. kernel : str, default='gaussian' Kernel function type. Options include 'gaussian', 'tophat', 'epanechnikov', 'exponential', 'linear', 'cosine'. Returns ------- H : float Estimated differential entropy in nats (natural units). Notes ----- The KDE entropy estimator can suffer from boundary effects and may be biased for small sample sizes. The choice of bandwidth critically affects the estimate: - Too small: Undersmoothed, entropy overestimated - Too large: Oversmoothed, entropy underestimated Silverman's rule provides a reasonable default bandwidth for Gaussian-like data. """ kde = KernelDensity(bandwidth=bandwidth, kernel=kernel).fit(X) log_density = np.exp(kde.score_samples(X)) Hx = -np.sum(np.log(log_density)) / len(log_density) return Hx
[docs] def geometric_knn_entropy(X, Xdist, k=1): r""" Estimate entropy using geometric k-nearest neighbor method. This function implements the geometric k-NN entropy estimator from Lord, Sun, and Bollt. The method estimates differential entropy by analyzing the geometric properties of k-nearest neighbor configurations in the data space. The entropy estimate is given by: .. math:: H(X) = \log N + \log \frac{\pi^{d/2}}{\Gamma(1 + d/2)} + \frac{d}{N} \sum_{i=1}^{N} \log \rho_i + \text{geometric correction} where :math:`N` is the sample size, :math:`d` is the dimension, :math:`\rho_i` is the distance to the k-th nearest neighbor of point :math:`i`, and the geometric correction accounts for the local geometry of the nearest neighbor configuration. Parameters ---------- X : array-like of shape (N, d) Input data matrix where N is the number of samples and d is the dimensionality. Xdist : array-like of shape (N, N) Pairwise distance matrix between all points in X. k : int, default=1 Number of nearest neighbors to consider for entropy estimation. Returns ------- H_X : float Estimated differential entropy using the geometric k-NN method. Notes ----- This estimator is particularly effective for: - High-dimensional data where traditional methods may fail - Data with non-uniform density distributions - Cases where the underlying geometry is important The geometric correction term accounts for the local dimensionality and shape of the data manifold, making this estimator more robust than standard k-NN methods. References ---------- .. [1] Lord, W.M., Sun, J., Bollt, E.M. Geometric k-nearest neighbor estimation of entropy and mutual information. Chaos 28, 033113 (2018). """ N, d = X.shape Xknn = np.zeros((N, k), dtype=int) for i in range(N): Xknn[i, :] = np.argsort(Xdist[i, :])[1 : k + 1] H_X = np.log(N) + np.log(np.pi ** (d / 2) / gamma(1 + d / 2)) # Compute distance-based term with safety checks log_distances = [] for i in range(N): dist = l2dist(X[i, :], X[Xknn[i, k - 1], :]) if dist > 1e-12: # Avoid log(0) log_distances.append(np.log(dist)) else: log_distances.append(-12.0) # log(1e-12) as a reasonable lower bound H_X += d / N * np.sum(log_distances) # Compute geometric correction term with safety checks successful_corrections = [] failed_count = 0 for i in range(N): Y_i = X[np.append([i], Xknn[i, :]), :] - np.mean( X[np.append([i], Xknn[i, :]), :], axis=0 ) Z_i = X[Xknn[i, :], :] - X[i, :] try: svd_Yi = np.linalg.svd(Y_i) sing_Yi = svd_Yi[1] # Hyperellipsoid check hyperellipsoid_sum = np.sum( [hyperellipsoid_check(svd_Yi, Z_i[j, :]) for j in range(k)] ) # Avoid log(0) in the hyperellipsoid term log_hyper = -np.log(max(1, hyperellipsoid_sum)) # Singular value ratio term with safety checks sing_ratio_sum = 0.0 if len(sing_Yi) > 0 and sing_Yi[0] > 1e-12: for l in range(min(d, len(sing_Yi))): if l < len(sing_Yi) and sing_Yi[l] > 1e-12: ratio = sing_Yi[l] / sing_Yi[0] if ratio > 1e-12: sing_ratio_sum += np.log(ratio) else: sing_ratio_sum += -12.0 # log(1e-12) correction = log_hyper + sing_ratio_sum if np.isfinite(correction): successful_corrections.append(correction) else: failed_count += 1 except (np.linalg.LinAlgError, ValueError): failed_count += 1 if failed_count > 0: warnings.warn( f"Geometric correction failed for {failed_count}/{N} points. " f"Entropy estimate may be biased." ) if successful_corrections: H_X += np.mean(successful_corrections) return H_X
[docs] def poisson_entropy(lambdas): r""" Estimate entropy for Poisson-distributed random variables. This function computes the entropy of Poisson random variables with given rate parameters. For a Poisson random variable X with parameter λ, the entropy is: .. math:: H(X) = -\sum_{k=0}^{\infty} P(X = k) \log P(X = k) where :math:`P(X = k) = \frac{\lambda^k e^{-\lambda}}{k!}`. The summation is truncated when the cumulative probability reaches a specified tolerance to ensure numerical stability. Parameters ---------- lambdas : array-like Rate parameters for the Poisson distributions. Can be scalar or array. Values are automatically converted to absolute values. Returns ------- est : float or array-like Estimated entropy values in nats. Shape matches the input lambdas. Notes ----- This implementation: - Uses adaptive truncation based on cumulative probability mass - Handles numerical stability by setting log(0) terms to zero - Returns real values even if complex arithmetic is used internally The estimator is particularly useful for count data and discrete event processes where Poisson assumptions are appropriate. References ---------- .. [1] Fish, A., Bollt, E. Interaction networks from discrete event data by Poisson multivariate mutual information estimation and information flow with applications from gene expression data. (In preparation) """ lambdas = np.abs(lambdas) First = np.exp(-lambdas) Psum = First P = [np.matrix(First)] counter = 0 small = 1 i = 1 while np.max(1 - Psum) > 1e-16 and small > 1e-75: counter = counter + 1 prob = scipy.stats.poisson.pmf(i, lambdas) Psum = Psum + prob P.append(np.matrix(prob)) if i >= np.max(lambdas): small = np.min(prob) i = i + 1 P = np.array(P).squeeze() est_a = P * np.log(P) try: est = -np.sum(est_a, axis=0) except: est = -np.sum(est_a) return np.real(est)
[docs] def poisson_joint_entropy(Cov): r""" Estimate joint entropy for multivariate Poisson distributions. This function computes the joint entropy of a multivariate Poisson distribution using the covariance matrix structure. The joint entropy decomposes into: .. math:: H(\mathbf{X}) = \sum_{i} H(X_i) + \sum_{i<j} \text{Cov}(X_i, X_j) where the first term represents marginal entropies and the second captures the interaction effects through covariances. Parameters ---------- Cov : array-like of shape (n, n) Covariance matrix of the multivariate Poisson distribution. Diagonal elements represent marginal variances (= means for Poisson). Off-diagonal elements represent covariances between variables. Returns ------- joint_entropy : float Estimated joint entropy of the multivariate Poisson distribution. Notes ----- This decomposition assumes a specific form for multivariate Poisson distributions where the interaction structure is captured through the covariance terms. The method: 1. Computes marginal entropies using diagonal elements (Poisson parameters) 2. Adds covariance contributions from off-diagonal elements This approach is computationally efficient for high-dimensional Poisson models. """ T = np.triu(Cov, 1) T = np.matrix(T) U = np.matrix(np.diag(Cov)) Ent1 = np.sum(poisson_entropy(U)) Ent2 = np.sum(T) return Ent1 + Ent2