normi

Normalized mutual information

`NormalizedMI(*, n_dims=1, normalize_method='geometric', invariant_measure='volume', k=5, n_jobs=-1, verbose=True)` ¶

Bases: BaseEstimator

Class for estimating the normalized mutual information.

Parameters:

n_dims (int, default: 1 ) –

Dimensionality of input vectors.
normalize_method (str, default: 'geometric' ) –

Determines the normalization factor for the mutual information:
- 'joint' is the joint entropy
- 'max' is the maximum of the individual entropies
- 'arithmetic' is the mean of the individual entropies
- 'geometric' is the square root of the product of the individual entropies
- 'min' is the minimum of the individual entropies
invariant_measure (str, default: 'volume' ) –
- 'radius' normalizing by mean k-nn radius
- 'volume' normalizing by mean k-nn volume
- 'kraskov' no normalization
k (int, default: 5 ) –

Number of nearest neighbors to use in \(k\)-nn estimator.
n_jobs (Int, default: -1 ) –

Number of jobs to use, -1 uses as many as cores are available.
verbose (bool, default: True ) –

Setting verbose mode.

Attributes:

mi_ (ndarray of shape (n_features, n_features)) –

The pairwise mutual information matrix of the data.
nmi_ (ndarray of shape (n_features, n_features)) –

The normalized pairwise mutual information matrix of the data.
hxy_ (ndarray of shape (n_features, n_features)) –

The pairwise joint entropy matrix of the data.
hx_ (ndarray of shape (n_features, n_features)) –

The pairwise entropy matrix of the data.
hy_ (ndarray of shape (n_features, n_features)) –

The pairwise entropy matrix of the data.

Examples:

>>> from normi import NormalizedMI
>>> x = np.linspace(0, np.pi, 1000)
>>> data = np.array([np.cos(x), np.cos(x + np.pi / 6)]).T
>>> nmi = NormalizedMI()
>>> nmi.fit(data)
NormalizedMI()
>>> nmi.nmi_
array([[1.        , 0.79868365],
       [0.79868365, 1.        ]])

Initialize NormalizedMI class.

Source code in src/normi/_estimators.py

@beartype
def __init__(
    self,
    *,
    n_dims: PositiveInt = 1,
    normalize_method: NormString = 'geometric',
    invariant_measure: InvMeasureString = 'volume',
    k: PositiveInt = 5,
    n_jobs: Int = -1,
    verbose: bool = True,
):
    """Initialize NormalizedMI class."""
    self.n_dims: PositiveInt = n_dims
    self.normalize_method: NormString = normalize_method
    self.invariant_measure: InvMeasureString = invariant_measure
    self.k: PositiveInt = k
    self.verbose: bool = verbose
    self.n_jobs: Int = n_jobs

`fit(X, y=None)` ¶

Compute the normalized mutual information matrix.

Parameters:

X (ndarray of shape (n_samples, n_features x n_dims)) –

Training data.
y (Ignored, default: None ) –

Not used, present for scikit API consistency by convention.

Returns:

self ( object ) –

Fitted estimator.

Source code in src/normi/_estimators.py

@beartype
def fit(
    self,
    X: FloatMax2DArray,
    y: Optional[ArrayLikeFloat] = None,
):
    """Compute the normalized mutual information matrix.

    Parameters
    ----------
    X : ndarray of shape (n_samples, n_features x n_dims)
        Training data.
    y : Ignored
        Not used, present for scikit API consistency by convention.

    Returns
    -------
    self : object
        Fitted estimator.

    """
    self._reset()

    _check_X(X=X, n_dims=self.n_dims)

    # define number of features and samples
    n_samples: int
    n_cols: int
    n_samples, n_cols = X.shape
    self._n_samples: int = n_samples
    self._n_features: int = n_cols // self.n_dims

    # scale input
    X = StandardScaler().fit_transform(X)
    X = np.split(X, self._n_features, axis=1)

    self.mi_: PositiveMatrix
    self.hxy_: FloatMatrix
    self.hx_: FloatMatrix
    self.hy_: FloatMatrix

    self.mi_, self.hxy_, self.hx_, self.hy_ = self._kraskov_estimator(X)

    self.nmi_: NormalizedMatrix = self.nmi(
        normalize_method=self.normalize_method,
    )

    return self

`fit_transform(X, y=None)` ¶

Compute the normalized mutual information matrix and return it.

Parameters:

X (ndarray of shape (n_samples, n_features x n_dims)) –

Training data.
y (Ignored, default: None ) –

Not used, present for scikit API consistency by convention.

Returns:

NMI ( ndarray of shape (n_features, n_features) ) –

Pairwise normalized mutual information matrix.

Source code in src/normi/_estimators.py

@beartype
def fit_transform(
    self,
    X: FloatMax2DArray,
    y: Optional[ArrayLikeFloat] = None,
) -> NormalizedMatrix:
    """Compute the normalized mutual information matrix and return it.

    Parameters
    ----------
    X : ndarray of shape (n_samples, n_features x n_dims)
        Training data.
    y : Ignored
        Not used, present for scikit API consistency by convention.

    Returns
    -------
    NMI : ndarray of shape (n_features, n_features)
        Pairwise normalized mutual information matrix.

    """
    self.fit(X)
    return self.nmi_

`transform(X)` ¶

Compute the correlation/nmi distance matrix and returns it.

Parameters:

X (ndarray of shape (n_samples, n_features) or str if low_memory=True) –

Training data.

Returns:

Similarity ( ndarray of shape (n_features, n_features) ) –

Similarity matrix.

Source code in src/normi/_estimators.py

@beartype
def transform(
    self,
    X: Union[FloatMax2DArray, str],
) -> PositiveMatrix:
    """Compute the correlation/nmi distance matrix and returns it.

    Parameters
    ----------
    X : ndarray of shape (n_samples, n_features) or str if low_memory=True
        Training data.

    Returns
    -------
    Similarity : ndarray of shape (n_features, n_features)
        Similarity matrix.

    """
    return self.fit_transform(X)

`nmi(normalize_method=None)` ¶

Return the normalized mutual information matrix.

Parameters:

normalize_method (str, default: None ) –

If None use class definition. Determines the normalization factor for the mutual information: - 'joint' is the joint entropy - 'max' is the maximum of the individual entropies - 'arithmetic' is the mean of the individual entropies - 'geometric' is the square root of the product of the individual entropies - 'min' is the minimum of the individual entropies

Returns:

nmi_ ( ndarray of shape (n_features, n_features) ) –

The normalized pairwise mutual information matrix of the data.

Source code in src/normi/_estimators.py

@beartype
def nmi(
    self, normalize_method: Optional[NormString] = None,
) -> NormalizedMatrix:
    """Return the normalized mutual information matrix.

    Parameters
    ----------
    normalize_method : str, default=None
        If `None` use class definition.
        Determines the normalization factor for the mutual information:
        - `'joint'` is the joint entropy
        - `'max'` is the maximum of the individual entropies
        - `'arithmetic'` is the mean of the individual entropies
        - `'geometric'` is the square root of the product of the individual
          entropies
        - `'min'` is the minimum of the individual entropies

    Returns
    -------
    nmi_ : ndarray of shape (n_features, n_features)
        The normalized pairwise mutual information matrix of the data.

    """
    check_is_fitted(self, attributes=['mi_', 'hxy_', 'hx_', 'hy_'])

    if normalize_method is None:
        normalize_method = self.normalize_method

    nmi_: np.ndarray
    if normalize_method == 'joint':
        nmi_ = self.mi_ / self.hxy_
    else:
        func: Callable = {
            'geometric': lambda arr: np.sqrt(np.prod(arr, axis=0)),
            'arithmetic': lambda arr: np.mean(arr, axis=0),
            'min': lambda arr: np.min(arr, axis=0),
            'max': lambda arr: np.max(arr, axis=0),
        }[normalize_method]
        nmi_ = self.mi_ / func([self.hx_, self.hy_])

    # ensure strict normalization within [0, 1]
    return np.clip(nmi_, a_min=0, a_max=1)

normi

NormalizedMI(*, n_dims=1, normalize_method='geometric', invariant_measure='volume', k=5, n_jobs=-1, verbose=True) ¶

fit(X, y=None) ¶

fit_transform(X, y=None) ¶

transform(X) ¶

nmi(normalize_method=None) ¶

`NormalizedMI(*, n_dims=1, normalize_method='geometric', invariant_measure='volume', k=5, n_jobs=-1, verbose=True)` ¶

`fit(X, y=None)` ¶

`fit_transform(X, y=None)` ¶

`transform(X)` ¶

`nmi(normalize_method=None)` ¶