Source code for menpo.math.decomposition

from __future__ import division
import numpy as np
from .linalg import dot_inplace_right


[docs]def eigenvalue_decomposition(S, eps=10**-10):
    r"""
    Eigenvalue decomposition of a given covariance (or scatter) matrix.

    Parameters
    ----------
    S : ``(N, N)`` `ndarray`
        Covariance/Scatter matrix
    eps : `float`, optional
        Small value to be used for the tolerance limit computation. The final
        limit is computed as ::

            limit = np.max(np.abs(eigenvalues)) * eps

    Returns
    -------
    pos_eigenvectors : ``(N, p)`` `ndarray`
        The matrix with the eigenvectors corresponding to positive eigenvalues.
    pos_eigenvalues : ``(p,)`` `ndarray`
        The array of positive eigenvalues.
    """
    # compute eigenvalue decomposition
    eigenvalues, eigenvectors = np.linalg.eigh(S)
    # sort eigenvalues from largest to smallest
    index = np.argsort(eigenvalues)[::-1]
    eigenvalues = eigenvalues[index]
    eigenvectors = eigenvectors[:, index]

    # set tolerance limit
    limit = np.max(np.abs(eigenvalues)) * eps

    # select positive eigenvalues
    pos_index = eigenvalues > 0.0
    pos_eigenvalues = eigenvalues[pos_index]
    pos_eigenvectors = eigenvectors[:, pos_index]
    # check they are within the expected tolerance
    index = pos_eigenvalues > limit
    pos_eigenvalues = pos_eigenvalues[index]
    pos_eigenvectors = pos_eigenvectors[:, index]

    return pos_eigenvectors, pos_eigenvalues


[docs]def principal_component_decomposition(X, whiten=False, centre=True,
                                      bias=False, inplace=False):
    r"""
    Apply Principal Component Analysis (PCA) on the data matrix `X`. In the case
    where the data matrix is very large, it is advisable to set
    ``inplace = True``. However, note this destructively edits the data matrix
    by subtracting the mean inplace.

    Parameters
    ----------
    X : ``(n_samples, n_features)`` `ndarray`
        Training data.
    whiten : `bool`, optional
        Normalise the eigenvectors to have unit magnitude.
    centre : `bool`, optional
        Whether to centre the data matrix. If ``False``, zero will be
        subtracted.
    bias : `bool`, optional
        Whether to use a biased estimate of the number of samples. If ``False``,
        subtracts ``1`` from the number of samples.
    inplace : `bool`, optional
        Whether to do the mean subtracting inplace or not. This is crucial if
        the data matrix is greater than half the available memory size.

    Returns
    -------
    eigenvectors : ``(n_components, n_features)`` `ndarray`
        The eigenvectors of the data matrix.
    eigenvalues : ``(n_components,)`` `ndarray`
        The positive eigenvalues from the data matrix.
    mean_vector : ``(n_components,)`` `ndarray`
        The mean that was subtracted from the dataset.
    """
    n_samples, n_features = X.shape

    if bias:
        N = n_samples
    else:
        N = n_samples - 1.0

    if centre:
        # centre data
        mean_vector = np.mean(X, axis=0)
    else:
        mean_vector = np.zeros(n_features)

    # This is required if the data matrix is very large!
    if inplace:
        X -= mean_vector
    else:
        X = X - mean_vector

    if n_features < n_samples:
        # compute covariance matrix
        # S:  n_features  x  n_features
        S = np.dot(X.T, X) / N
        # S should be perfectly symmetrical, but numerical error can creep
        # in. Enforce symmetry here to avoid creating complex
        # eigenvectors from eigendecomposition
        S = (S + S.T) / 2.0

        # perform eigenvalue decomposition
        # eigenvectors:  n_features x  n_features
        # eigenvalues:   n_features
        eigenvectors, eigenvalues = eigenvalue_decomposition(S)

        if whiten:
            # whiten eigenvectors
            eigenvectors *= np.sqrt(1.0 / eigenvalues)

        # transpose eigenvectors
        # eigenvectors:  n_samples  x  n_features
        eigenvectors = eigenvectors.T

    else:
        # n_features > n_samples
        # compute covariance matrix
        # S:  n_samples  x  n_samples
        S = np.dot(X, X.T) / N
        # S should be perfectly symmetrical, but numerical error can creep
        # in. Enforce symmetry here to avoid creating complex
        # eigenvectors from eigendecomposition
        S = (S + S.T) / 2.0

        # perform eigenvalue decomposition
        # eigenvectors:  n_samples  x  n_samples
        # eigenvalues:   n_samples
        eigenvectors_s, eigenvalues = eigenvalue_decomposition(S)

        # compute final eigenvectors
        # eigenvectors:  n_samples  x  n_features
        if whiten:
            w = (N * eigenvalues) ** -1.0
        else:
            w = np.sqrt(1.0 / (N * eigenvalues))

        dot = dot_inplace_right if inplace else np.dot
        eigenvectors = dot(eigenvectors_s.T, X)

        # whiten, and we are done.
        eigenvectors *= w[:, None]

    return eigenvectors, eigenvalues, mean_vector