Source code for menpo.model.pca

from __future__ import division
import numpy as np

from menpo.base import doc_inherit
from menpo.math import pca, pcacov, ipca, as_matrix
from .linear import MeanLinearModel
from .vectorizable import VectorizableBackedModel


[docs]class PCAVectorModel(MeanLinearModel): r""" A :map:`MeanLinearModel` where components are Principal Components. Principal Component Analysis (PCA) by eigenvalue decomposition of the data's scatter matrix. For details of the implementation of PCA, see :map:`pca`. Parameters ---------- samples : `ndarray` or `list` or `iterable` of `ndarray` List or iterable of numpy arrays to build the model from, or an existing data matrix. centre : `bool`, optional When ``True`` (default) PCA is performed after mean centering the data. If ``False`` the data is assumed to be centred, and the mean will be ``0``. n_samples : `int`, optional If provided then ``samples`` must be an iterator that yields ``n_samples``. If not provided then samples has to be a `list` (so we know how large the data matrix needs to be). max_n_components : `int`, optional The maximum number of components to keep in the model. Any components above and beyond this one are discarded. inplace : `bool`, optional If ``True`` the data matrix is modified in place. Otherwise, the data matrix is copied. """ def __init__(self, samples, centre=True, n_samples=None, max_n_components=None, inplace=True): # Generate data matrix data, self.n_samples = self._data_to_matrix(samples, n_samples) # Compute pca e_vectors, e_values, mean = pca(data, centre=centre, inplace=inplace) # The call to __init__ of MeanLinearModel is done in here self._constructor_helper( eigenvalues=e_values, eigenvectors=e_vectors, mean=mean, centred=centre, max_n_components=max_n_components) @classmethod
[docs] def init_from_covariance_matrix(cls, C, mean, n_samples, centred=True, max_n_components=None): r""" Build the Principal Component Analysis (PCA) by eigenvalue decomposition of the provided covariance/scatter matrix. For details of the implementation of PCA, see :map:`pcacov`. Parameters ---------- C : ``(n_features, n_features)`` `ndarray` The Covariance/Scatter matrix. mean : ``(n_features, )`` `ndarray` The mean vector. n_samples : `int` The number of samples used to generate the covariance matrix. centred : `bool`, optional When ``True`` we assume that the data were centered before computing the covariance matrix. max_n_components : `int`, optional The maximum number of components to keep in the model. Any components above and beyond this one are discarded. """ # Compute pca on covariance e_vectors, e_values = pcacov(C) # Create new pca instance model = PCAModel.__new__(cls) model.n_samples = n_samples # The call to __init__ of MeanLinearModel is done in here model._constructor_helper( eigenvalues=e_values, eigenvectors=e_vectors, mean=mean, centred=centred, max_n_components=max_n_components) return model
@classmethod
[docs] def init_from_components(cls, components, eigenvalues, mean, n_samples, centred, max_n_components=None): r""" Build the Principal Component Analysis (PCA) using the provided components (eigenvectors) and eigenvalues. Parameters ---------- components : ``(n_components, n_features)`` `ndarray` The eigenvectors to be used. eigenvalues : ``(n_components, )`` `ndarray` The corresponding eigenvalues. mean : ``(n_features, )`` `ndarray` The mean vector. n_samples : `int` The number of samples used to generate the eigenvectors. centred : `bool`, optional When ``True`` we assume that the data were centered before computing the eigenvectors. max_n_components : `int`, optional The maximum number of components to keep in the model. Any components above and beyond this one are discarded. """ # This is a bit of a filthy trick that by rights should not be done, # but we want to have these nice static constructors so we are living # with the shame (create an empty object instance which we fill in). model = PCAModel.__new__(cls) model.n_samples = n_samples # The call to __init__ of MeanLinearModel is done in here model._constructor_helper( eigenvalues=eigenvalues, eigenvectors=components, mean=mean, centred=centred, max_n_components=max_n_components) return model
def _constructor_helper(self, eigenvalues, eigenvectors, mean, centred, max_n_components): # if covariance is not centred, mean must be zeros. if centred: MeanLinearModel.__init__(self, eigenvectors, mean) else: MeanLinearModel.__init__(self, eigenvectors, np.zeros(mean.shape, dtype=mean.dtype)) self.centred = centred self._eigenvalues = eigenvalues # start the active components as all the components self._n_active_components = int(self.n_components) self._trimmed_eigenvalues = np.array([]) if max_n_components is not None: self.trim_components(max_n_components) def _data_to_matrix(self, data, n_samples): # build a data matrix from all the samples if n_samples is None: n_samples = len(data) # Assumed data is ndarray of (n_samples, n_features) or list of samples if not isinstance(data, np.ndarray): # Make sure we have an array, slice of the number of requested # samples data = np.array(data)[:n_samples] return data, n_samples @property def n_active_components(self): r""" The number of components currently in use on this model. :type: `int` """ return self._n_active_components @n_active_components.setter def n_active_components(self, value): r""" Sets an updated number of active components on this model. The number of active components represents the number of principal components that will be used for generative purposes. Note that this therefore makes the model stateful. Also note that setting the number of components will not affect memory unless :meth:`trim_components` is called. Parameters ---------- value : `int` The new number of active components. Raises ------ ValueError Tried setting n_active_components to {value} - value needs to be a float 0.0 < n_components < self._total_kept_variance_ratio ({}) or an integer 1 < n_components < self.n_components ({}) """ err_str = ("Tried setting n_active_components to {} - " "value needs to be a float " "0.0 < n_components < self._total_kept_variance_ratio " "({}) or an integer 1 < n_components < " "self.n_components ({})".format( value, self._total_variance_ratio(), self.n_components)) # check value if isinstance(value, float): if 0.0 < value <= self._total_variance_ratio(): # value needed to capture desired variance value = np.sum( [r < value for r in self._total_eigenvalues_cumulative_ratio()]) + 1 else: # variance must be bigger than 0.0 raise ValueError(err_str) if isinstance(value, int): if value < 1: # at least 1 value must be kept raise ValueError(err_str) elif value >= self.n_components: if self.n_active_components < self.n_components: # if the number of available components is smaller than # the total number of components set value to the later value = self.n_components else: # if the previous is false and value bigger than the # total number of components, do nothing return if 0 < value <= self.n_components: self._n_active_components = int(value) else: raise ValueError(err_str) @MeanLinearModel.components.getter def components(self): r""" Returns the active components of the model. :type: ``(n_active_components, n_features)`` `ndarray` """ return self._components[:self.n_active_components, :] @property def eigenvalues(self): r""" Returns the eigenvalues associated with the active components of the model, i.e. the amount of variance captured by each active component, sorted form largest to smallest. :type: ``(n_active_components,)`` `ndarray` """ return self._eigenvalues[:self.n_active_components]
[docs] def whitened_components(self): r""" Returns the active components of the model, whitened. Returns ------- whitened_components : ``(n_active_components, n_features)`` `ndarray` The whitened components. """ return self.components / ( np.sqrt(self.eigenvalues * self.n_samples + self.noise_variance())[:, None])
[docs] def original_variance(self): r""" Returns the total amount of variance captured by the original model, i.e. the amount of variance present on the original samples. Returns ------- optional_variance : `float` The variance captured by the model. """ return self._eigenvalues.sum() + self._trimmed_eigenvalues.sum()
[docs] def variance(self): r""" Returns the total amount of variance retained by the active components. Returns ------- variance : `float` Total variance captured by the active components. """ return self.eigenvalues.sum()
def _total_variance(self): r""" Returns the total amount of variance retained by all components (active and inactive). Useful when the model has been trimmed. Returns ------- total_variance : `float` Total variance captured by all components. """ return self._eigenvalues.sum()
[docs] def variance_ratio(self): r""" Returns the ratio between the amount of variance retained by the active components and the total amount of variance present on the original samples. Returns ------- variance_ratio : `float` Ratio of active components variance and total variance present in original samples. """ return self.variance() / self.original_variance()
def _total_variance_ratio(self): r""" Returns the ratio between the total amount of variance retained by all components (active and inactive) and the total amount of variance present on the original samples. Returns ------- total_variance_ratio : `float` Ratio of total variance over the original variance. """ return self._total_variance() / self.original_variance()
[docs] def eigenvalues_ratio(self): r""" Returns the ratio between the variance captured by each active component and the total amount of variance present on the original samples. Returns ------- eigenvalues_ratio : ``(n_active_components,)`` `ndarray` The active eigenvalues array scaled by the original variance. """ return self.eigenvalues / self.original_variance()
def _total_eigenvalues_ratio(self): r""" Returns the ratio between the variance captured by each active component and the total amount of variance present on the original samples. Returns ------- total_eigenvalues_ratio : ``(n_components,)`` `ndarray` Array of eigenvalues scaled by the original variance. """ return self._eigenvalues / self.original_variance()
[docs] def eigenvalues_cumulative_ratio(self): r""" Returns the cumulative ratio between the variance captured by the active components and the total amount of variance present on the original samples. Returns ------- eigenvalues_cumulative_ratio : ``(n_active_components,)`` `ndarray` Array of cumulative eigenvalues. """ return np.cumsum(self.eigenvalues_ratio())
def _total_eigenvalues_cumulative_ratio(self): r""" Returns the cumulative ratio between the variance captured by the active components and the total amount of variance present on the original samples. Returns ------- total_eigenvalues_cumulative_ratio : ``(n_active_components,)`` `ndarray` Array of total cumulative eigenvalues. """ return np.cumsum(self._total_eigenvalues_ratio())
[docs] def noise_variance(self): r""" Returns the average variance captured by the inactive components, i.e. the sample noise assumed in a Probabilistic PCA formulation. If all components are active, then ``noise_variance == 0.0``. Returns ------- noise_variance : `float` The mean variance of the inactive components. """ if self.n_active_components == self.n_components: if self._trimmed_eigenvalues.size != 0: noise_variance = self._trimmed_eigenvalues.mean() else: noise_variance = 0.0 else: noise_variance = np.hstack( (self._eigenvalues[self.n_active_components:], self._trimmed_eigenvalues)).mean() return noise_variance
[docs] def noise_variance_ratio(self): r""" Returns the ratio between the noise variance and the total amount of variance present on the original samples. Returns ------- noise_variance_ratio : `float` The ratio between the noise variance and the variance present in the original samples. """ return self.noise_variance() / self.original_variance()
[docs] def inverse_noise_variance(self): r""" Returns the inverse of the noise variance. Returns ------- inverse_noise_variance : `float` Inverse of the noise variance. Raises ------ ValueError If ``noise_variance() == 0`` """ noise_variance = self.noise_variance() if np.allclose(noise_variance, 0): raise ValueError("noise variance is effectively 0 - " "cannot take the inverse") return 1.0 / noise_variance
[docs] def component(self, index, with_mean=True, scale=1.0): r""" A particular component of the model, in vectorized form. Parameters ---------- index : `int` The component that is to be returned with_mean: `bool`, optional If ``True``, the component will be blended with the mean vector before being returned. If not, the component is returned on it's own. scale : `float`, optional A scale factor that should be applied to the component. Only valid in the case where with_mean is ``True``. The scale is applied in units of standard deviations (so a scale of ``1.0`` `with_mean` visualizes the mean plus ``1`` std. dev of the component in question). Returns ------- component_vector : ``(n_features,)`` `ndarray` The component vector of the given index. """ if with_mean: # on PCA, scale is in units of std. deviations... scaled_eigval = scale * np.sqrt(self.eigenvalues[index]) return (scaled_eigval * self.components[index]) + self._mean else: return self.components[index]
[docs] def instance_vectors(self, weights, normalized_weights=False): """ Creates new vectorized instances of the model using the first components in a particular weighting. Parameters ---------- weights : ``(n_vectors, n_weights)`` `ndarray` or `list` of `lists` The weightings for the first `n_weights` components that should be used per instance that is to be produced ``weights[i, j]`` is the linear contribution of the j'th principal component to the i'th instance vector produced. Note that if ``n_weights < n_components``, only the first ``n_weight`` components are used in the reconstruction (i.e. unspecified weights are implicitly ``0``). normalized_weights : `bool`, optional If ``True``, the weights are assumed to be normalized w.r.t the eigenvalues. This can be easier to create unique instances by making the weights more interpretable. Returns ------- vectors : ``(n_vectors, n_features)`` `ndarray` The instance vectors for the weighting provided. Raises ------ ValueError If n_weights > n_components """ weights = np.asarray(weights) # if eg a list is provided n_instances, n_weights = weights.shape if n_weights > self.n_active_components: raise ValueError( "Number of weightings cannot be greater than {}".format( self.n_active_components)) else: full_weights = np.zeros((n_instances, self.n_active_components), dtype=self._components.dtype) full_weights[..., :n_weights] = weights weights = full_weights if normalized_weights: # If the weights were normalized, then they are all relative to # to the scale of the eigenvalues and thus must be multiplied by # the sqrt of the eigenvalues. weights *= self.eigenvalues ** 0.5 return self._instance_vectors_for_full_weights(weights)
[docs] def instance(self, weights, normalized_weights=False): r""" Creates a new vector instance of the model by weighting together the components. Parameters ---------- weights : ``(n_weights,)`` `ndarray` or `list` The weightings for the first `n_weights` components that should be used. ``weights[j]`` is the linear contribution of the j'th principal component to the instance vector. normalized_weights : `bool`, optional If ``True``, the weights are assumed to be normalized w.r.t the eigenvalues. This can be easier to create unique instances by making the weights more interpretable. Returns ------- vector : ``(n_features,)`` `ndarray` The instance vector for the weighting provided. """ weights = np.asarray(weights) return self.instance_vectors( weights[None, :], normalized_weights=normalized_weights).flatten()
[docs] def trim_components(self, n_components=None): r""" Permanently trims the components down to a certain amount. The number of active components will be automatically reset to this particular value. This will reduce `self.n_components` down to `n_components` (if ``None``, `self.n_active_components` will be used), freeing up memory in the process. Once the model is trimmed, the trimmed components cannot be recovered. Parameters ---------- n_components: `int` >= ``1`` or `float` > ``0.0`` or ``None``, optional The number of components that are kept or else the amount (ratio) of variance that is kept. If ``None``, `self.n_active_components` is used. Notes ----- In case `n_components` is greater than the total number of components or greater than the amount of variance currently kept, this method does not perform any action. """ if n_components is None: # by default trim using the current n_active_components n_components = self.n_active_components # set self.n_active_components to n_components self.n_active_components = n_components if self.n_active_components < self.n_components: # Just stored so that we can fit < 80 chars nac = self.n_active_components # set self.n_components to n_components. We have to copy to ensure # that the data is actually removed, otherwise a view is returned self._components = self._components[:nac].copy() # store the eigenvalues associated to the discarded components self._trimmed_eigenvalues = np.hstack(( self._trimmed_eigenvalues, self._eigenvalues[self.n_active_components:])) # make sure that the eigenvalues are trimmed too self._eigenvalues = self._eigenvalues[:nac].copy()
[docs] def project_whitened(self, vector_instance): """ Projects the `vector_instance` onto the whitened components, retrieving the whitened linear weightings. Parameters ---------- vector_instance : ``(n_features,)`` `ndarray` A novel vector. Returns ------- projected : ``(n_features,)`` `ndarray` A vector of whitened linear weightings """ whitened_components = self.whitened_components() return np.dot(vector_instance, whitened_components.T)
[docs] def orthonormalize_against_inplace(self, linear_model): r""" Enforces that the union of this model's components and another are both mutually orthonormal. Note that the model passed in is guaranteed to not have it's number of available components changed. This model, however, may loose some dimensionality due to reaching a degenerate state. The removed components will always be trimmed from the end of components (i.e. the components which capture the least variance). If trimming is performed, `n_components` and `n_available_components` would be altered - see :meth:`trim_components` for details. Parameters ---------- linear_model : :map:`LinearModel` A second linear model to orthonormalize this against. """ # take the QR decomposition of the model components Q = (np.linalg.qr(np.hstack((linear_model._components.T, self._components.T)))[0]).T # the model passed to us went first, so all it's components will # survive. Pull them off, and update the other model. linear_model.components = Q[:linear_model.n_components, :] # it's possible that all of our components didn't survive due to # degeneracy. We need to trim our components down before replacing # them to ensure the number of components is consistent (otherwise # the components setter will complain at us) n_available_components = Q.shape[0] - linear_model.n_components if n_available_components < self.n_components: # oh dear, we've lost some components from the end of our model. if self.n_active_components < n_available_components: # save the current number of active components n_active_components = self.n_active_components else: # save the current number of available components n_active_components = n_available_components # call trim_components to update our state. self.trim_components(n_components=n_available_components) if n_active_components < n_available_components: # reset the number of active components self.n_active_components = n_active_components # now we can set our own components with the updated orthogonal ones self.components = Q[linear_model.n_components:, :]
[docs] def increment(self, data, n_samples=None, forgetting_factor=1.0, verbose=False): r""" Update the eigenvectors, eigenvalues and mean vector of this model by performing incremental PCA on the given samples. Parameters ---------- samples : `list` of :map:`Vectorizable` List of new samples to update the model from. n_samples : `int`, optional If provided then ``samples`` must be an iterator that yields ``n_samples``. If not provided then samples has to be a list (so we know how large the data matrix needs to be). forgetting_factor : ``[0.0, 1.0]`` `float`, optional Forgetting factor that weights the relative contribution of new samples vs old samples. If 1.0, all samples are weighted equally and, hence, the results is the exact same as performing batch PCA on the concatenated list of old and new simples. If <1.0, more emphasis is put on the new samples. See [1] for details. References ---------- .. [1] David Ross, Jongwoo Lim, Ruei-Sung Lin, Ming-Hsuan Yang. "Incremental Learning for Robust Visual Tracking". IJCV, 2007. """ data, n_new_samples = self._data_to_matrix(data, n_samples) # compute incremental pca e_vectors, e_values, m_vector = ipca( data, self._components, self._eigenvalues, self.n_samples, m_a=self._mean, f=forgetting_factor) # if the number of active components is the same as the total number # of components so it will be after this method is executed reset = (self.n_active_components == self.n_components) # update mean, components, eigenvalues and number of samples self._mean = m_vector self._components = e_vectors self._eigenvalues = e_values self.n_samples += n_new_samples # reset the number of active components to the total number of # components if reset: self.n_active_components = self.n_components
[docs] def plot_eigenvalues(self, figure_id=None, new_figure=False, render_lines=True, line_colour='b', line_style='-', line_width=2, render_markers=True, marker_style='o', marker_size=6, marker_face_colour='b', marker_edge_colour='k', marker_edge_width=1., render_axes=True, axes_font_name='sans-serif', axes_font_size=10, axes_font_style='normal', axes_font_weight='normal', figure_size=(10, 6), render_grid=True, grid_line_style='--', grid_line_width=0.5): r""" Plot of the eigenvalues. Parameters ---------- figure_id : `object`, optional The id of the figure to be used. new_figure : `bool`, optional If ``True``, a new figure is created. render_lines : `bool`, optional If ``True``, the line will be rendered. line_colour : See Below, optional The colour of the lines. Example options :: {``r``, ``g``, ``b``, ``c``, ``m``, ``k``, ``w``} or ``(3, )`` `ndarray` or `list` of length ``3`` line_style : {``-``, ``--``, ``-.``, ``:``}, optional The style of the lines. line_width : `float`, optional The width of the lines. render_markers : `bool`, optional If ``True``, the markers will be rendered. marker_style : See Below, optional The style of the markers. Example options :: {``.``, ``,``, ``o``, ``v``, ``^``, ``<``, ``>``, ``+``, ``x``, ``D``, ``d``, ``s``, ``p``, ``*``, ``h``, ``H``, ``1``, ``2``, ``3``, ``4``, ``8``} marker_size : `int`, optional The size of the markers in points^2. marker_face_colour : See Below, optional The face (filling) colour of the markers. Example options :: {``r``, ``g``, ``b``, ``c``, ``m``, ``k``, ``w``} or ``(3, )`` `ndarray` or `list` of length ``3`` marker_edge_colour : See Below, optional The edge colour of the markers. Example options :: {``r``, ``g``, ``b``, ``c``, ``m``, ``k``, ``w``} or ``(3, )`` `ndarray` or `list` of length ``3`` marker_edge_width : `float`, optional The width of the markers' edge. render_axes : `bool`, optional If ``True``, the axes will be rendered. axes_font_name : See Below, optional The font of the axes. Example options :: {``serif``, ``sans-serif``, ``cursive``, ``fantasy``, ``monospace``} axes_font_size : `int`, optional The font size of the axes. axes_font_style : {``normal``, ``italic``, ``oblique``}, optional The font style of the axes. axes_font_weight : See Below, optional The font weight of the axes. Example options :: {``ultralight``, ``light``, ``normal``, ``regular``, ``book``, ``medium``, ``roman``, ``semibold``, ``demibold``, ``demi``, ``bold``, ``heavy``, ``extra bold``, ``black``} figure_size : (`float`, `float`) or ``None``, optional The size of the figure in inches. render_grid : `bool`, optional If ``True``, the grid will be rendered. grid_line_style : {``-``, ``--``, ``-.``, ``:``}, optional The style of the grid lines. grid_line_width : `float`, optional The width of the grid lines. Returns ------- viewer : :map:`MatplotlibRenderer` The viewer object. """ from menpo.visualize import GraphPlotter return GraphPlotter(figure_id=figure_id, new_figure=new_figure, x_axis=range(self.n_active_components), y_axis=[self.eigenvalues], title='Eigenvalues', x_label='Component Number', y_label='Eigenvalue', x_axis_limits=(0, self.n_active_components - 1), y_axis_limits=None).render( render_lines=render_lines, line_colour=line_colour, line_style=line_style, line_width=line_width, render_markers=render_markers, marker_style=marker_style, marker_size=marker_size, marker_face_colour=marker_face_colour, marker_edge_colour=marker_edge_colour, marker_edge_width=marker_edge_width, render_legend=False, render_axes=render_axes, axes_font_name=axes_font_name, axes_font_size=axes_font_size, axes_font_style=axes_font_style, axes_font_weight=axes_font_weight, render_grid=render_grid, grid_line_style=grid_line_style, grid_line_width=grid_line_width, figure_size=figure_size)
[docs] def plot_eigenvalues_widget(self, figure_size=(10, 6), style='coloured'): r""" Plot of the eigenvalues using an interactive widget. Parameters ---------- figure_size : (`float`, `float`) or ``None``, optional The size of the figure in inches. style : {``'coloured'``, ``'minimal'``}, optional If ``'coloured'``, then the style of the widget will be coloured. If ``minimal``, then the style is simple using black and white colours. """ try: from menpowidgets import plot_graph except: from menpo.visualize.base import MenpowidgetsMissingError raise MenpowidgetsMissingError() plot_graph(x_axis=range(self.n_active_components), y_axis=[self.eigenvalues], legend_entries=['Eigenvalues'], title='Eigenvalues', x_label='Component Number', y_label='Eigenvalue', x_axis_limits=(0, self.n_active_components - 1), y_axis_limits=None, figure_size=figure_size, style=style)
[docs] def plot_eigenvalues_ratio(self, figure_id=None, new_figure=False, render_lines=True, line_colour='b', line_style='-', line_width=2, render_markers=True, marker_style='o', marker_size=6, marker_face_colour='b', marker_edge_colour='k', marker_edge_width=1., render_axes=True, axes_font_name='sans-serif', axes_font_size=10, axes_font_style='normal', axes_font_weight='normal', figure_size=(10, 6), render_grid=True, grid_line_style='--', grid_line_width=0.5): r""" Plot of the variance ratio captured by the eigenvalues. Parameters ---------- figure_id : `object`, optional The id of the figure to be used. new_figure : `bool`, optional If ``True``, a new figure is created. render_lines : `bool`, optional If ``True``, the line will be rendered. line_colour : See Below, optional The colour of the lines. Example options :: {``r``, ``g``, ``b``, ``c``, ``m``, ``k``, ``w``} or ``(3, )`` `ndarray` or `list` of length ``3`` line_style : {``-``, ``--``, ``-.``, ``:``}, optional The style of the lines. line_width : `float`, optional The width of the lines. render_markers : `bool`, optional If ``True``, the markers will be rendered. marker_style : See Below, optional The style of the markers. Example options :: {``.``, ``,``, ``o``, ``v``, ``^``, ``<``, ``>``, ``+``, ``x``, ``D``, ``d``, ``s``, ``p``, ``*``, ``h``, ``H``, ``1``, ``2``, ``3``, ``4``, ``8``} marker_size : `int`, optional The size of the markers in points^2. marker_face_colour : See Below, optional The face (filling) colour of the markers. Example options :: {``r``, ``g``, ``b``, ``c``, ``m``, ``k``, ``w``} or ``(3, )`` `ndarray` or `list` of length ``3`` marker_edge_colour : See Below, optional The edge colour of the markers. Example options :: {``r``, ``g``, ``b``, ``c``, ``m``, ``k``, ``w``} or ``(3, )`` `ndarray` or `list` of length ``3`` marker_edge_width : `float`, optional The width of the markers' edge. render_axes : `bool`, optional If ``True``, the axes will be rendered. axes_font_name : See Below, optional The font of the axes. Example options :: {``serif``, ``sans-serif``, ``cursive``, ``fantasy``, ``monospace``} axes_font_size : `int`, optional The font size of the axes. axes_font_style : {``normal``, ``italic``, ``oblique``}, optional The font style of the axes. axes_font_weight : See Below, optional The font weight of the axes. Example options :: {``ultralight``, ``light``, ``normal``, ``regular``, ``book``, ``medium``, ``roman``, ``semibold``, ``demibold``, ``demi``, ``bold``, ``heavy``, ``extra bold``, ``black``} figure_size : (`float`, `float`) or `None`, optional The size of the figure in inches. render_grid : `bool`, optional If ``True``, the grid will be rendered. grid_line_style : {``-``, ``--``, ``-.``, ``:``}, optional The style of the grid lines. grid_line_width : `float`, optional The width of the grid lines. Returns ------- viewer : :map:`MatplotlibRenderer` The viewer object. """ from menpo.visualize import GraphPlotter return GraphPlotter(figure_id=figure_id, new_figure=new_figure, x_axis=range(self.n_active_components), y_axis=[self.eigenvalues_ratio()], title='Variance Ratio of Eigenvalues', x_label='Component Number', y_label='Variance Ratio', x_axis_limits=(0, self.n_active_components - 1), y_axis_limits=None).render( render_lines=render_lines, line_colour=line_colour, line_style=line_style, line_width=line_width, render_markers=render_markers, marker_style=marker_style, marker_size=marker_size, marker_face_colour=marker_face_colour, marker_edge_colour=marker_edge_colour, marker_edge_width=marker_edge_width, render_legend=False, render_axes=render_axes, axes_font_name=axes_font_name, axes_font_size=axes_font_size, axes_font_style=axes_font_style, axes_font_weight=axes_font_weight, render_grid=render_grid, grid_line_style=grid_line_style, grid_line_width=grid_line_width, figure_size=figure_size)
[docs] def plot_eigenvalues_ratio_widget(self, figure_size=(10, 6), style='coloured'): r""" Plot of the variance ratio captured by the eigenvalues using an interactive widget. Parameters ---------- figure_size : (`float`, `float`) or ``None``, optional The size of the figure in inches. style : {``'coloured'``, ``'minimal'``}, optional If ``'coloured'``, then the style of the widget will be coloured. If ``minimal``, then the style is simple using black and white colours. """ try: from menpowidgets import plot_graph except: from menpo.visualize.base import MenpowidgetsMissingError raise MenpowidgetsMissingError() plot_graph(x_axis=range(self.n_active_components), y_axis=[self.eigenvalues_ratio()], legend_entries=['Eigenvalues ratio'], title='Variance Ratio of Eigenvalues', x_label='Component Number', y_label='Variance Ratio', x_axis_limits=(0, self.n_active_components - 1), y_axis_limits=None, figure_size=figure_size, style=style)
[docs] def plot_eigenvalues_cumulative_ratio(self, figure_id=None, new_figure=False, render_lines=True, line_colour='b', line_style='-', line_width=2, render_markers=True, marker_style='o', marker_size=6, marker_face_colour='b', marker_edge_colour='k', marker_edge_width=1., render_axes=True, axes_font_name='sans-serif', axes_font_size=10, axes_font_style='normal', axes_font_weight='normal', figure_size=(10, 6), render_grid=True, grid_line_style='--', grid_line_width=0.5): r""" Plot of the cumulative variance ratio captured by the eigenvalues. Parameters ---------- figure_id : `object`, optional The id of the figure to be used. new_figure : `bool`, optional If ``True``, a new figure is created. render_lines : `bool`, optional If ``True``, the line will be rendered. line_colour : See Below, optional The colour of the lines. Example options :: {``r``, ``g``, ``b``, ``c``, ``m``, ``k``, ``w``} or ``(3, )`` `ndarray` or `list` of length ``3`` line_style : {``-``, ``--``, ``-.``, ``:``}, optional The style of the lines. line_width : `float`, optional The width of the lines. render_markers : `bool`, optional If ``True``, the markers will be rendered. marker_style : See Below, optional The style of the markers. Example options :: {``.``, ``,``, ``o``, ``v``, ``^``, ``<``, ``>``, ``+``, ``x``, ``D``, ``d``, ``s``, ``p``, ``*``, ``h``, ``H``, ``1``, ``2``, ``3``, ``4``, ``8``} marker_size : `int`, optional The size of the markers in points^2. marker_face_colour : See Below, optional The face (filling) colour of the markers. Example options :: {``r``, ``g``, ``b``, ``c``, ``m``, ``k``, ``w``} or ``(3, )`` `ndarray` or `list` of length ``3`` marker_edge_colour : See Below, optional The edge colour of the markers. Example options :: {``r``, ``g``, ``b``, ``c``, ``m``, ``k``, ``w``} or ``(3, )`` `ndarray` or `list` of length ``3`` marker_edge_width : `float`, optional The width of the markers' edge. render_axes : `bool`, optional If ``True``, the axes will be rendered. axes_font_name : See Below, optional The font of the axes. Example options :: {``serif``, ``sans-serif``, ``cursive``, ``fantasy``, ``monospace``} axes_font_size : `int`, optional The font size of the axes. axes_font_style : {``normal``, ``italic``, ``oblique``}, optional The font style of the axes. axes_font_weight : See Below, optional The font weight of the axes. Example options :: {``ultralight``, ``light``, ``normal``, ``regular``, ``book``, ``medium``, ``roman``, ``semibold``, ``demibold``, ``demi``, ``bold``, ``heavy``, ``extra bold``, ``black``} figure_size : (`float`, `float`) or `None`, optional The size of the figure in inches. render_grid : `bool`, optional If ``True``, the grid will be rendered. grid_line_style : {``-``, ``--``, ``-.``, ``:``}, optional The style of the grid lines. grid_line_width : `float`, optional The width of the grid lines. Returns ------- viewer : :map:`MatplotlibRenderer` The viewer object. """ from menpo.visualize import GraphPlotter return GraphPlotter(figure_id=figure_id, new_figure=new_figure, x_axis=range(self.n_active_components), y_axis=[self.eigenvalues_cumulative_ratio()], title='Cumulative Variance Ratio of Eigenvalues', x_label='Component Number', y_label='Cumulative Variance Ratio', x_axis_limits=(0, self.n_active_components - 1), y_axis_limits=None).render( render_lines=render_lines, line_colour=line_colour, line_style=line_style, line_width=line_width, render_markers=render_markers, marker_style=marker_style, marker_size=marker_size, marker_face_colour=marker_face_colour, marker_edge_colour=marker_edge_colour, marker_edge_width=marker_edge_width, render_legend=False, render_axes=render_axes, axes_font_name=axes_font_name, axes_font_size=axes_font_size, axes_font_style=axes_font_style, axes_font_weight=axes_font_weight, render_grid=render_grid, grid_line_style=grid_line_style, grid_line_width=grid_line_width, figure_size=figure_size)
[docs] def plot_eigenvalues_cumulative_ratio_widget(self, figure_size=(10, 6), style='coloured'): r""" Plot of the cumulative variance ratio captured by the eigenvalues using an interactive widget. Parameters ---------- figure_size : (`float`, `float`) or ``None``, optional The size of the figure in inches. style : {``'coloured'``, ``'minimal'``}, optional If ``'coloured'``, then the style of the widget will be coloured. If ``minimal``, then the style is simple using black and white colours. """ try: from menpowidgets import plot_graph except: from menpo.visualize.base import MenpowidgetsMissingError raise MenpowidgetsMissingError() plot_graph(x_axis=range(self.n_active_components), y_axis=[self.eigenvalues_cumulative_ratio()], legend_entries=['Eigenvalues cumulative ratio'], title='Cumulative Variance Ratio of Eigenvalues', x_label='Component Number', y_label='Cumulative Variance Ratio', x_axis_limits=(0, self.n_active_components - 1), y_axis_limits=None, figure_size=figure_size, style=style)
def __str__(self): str_out = 'PCA Vector Model \n' \ ' - centred: {}\n' \ ' - # features: {}\n' \ ' - # active components: {}\n' \ ' - kept variance: {:.2} {:.1%}\n' \ ' - noise variance: {:.2} {:.1%}\n' \ ' - total # components: {}\n' \ ' - components shape: {}\n'.format( self.centred, self.n_features, self.n_active_components, self.variance(), self.variance_ratio(), self.noise_variance(), self.noise_variance_ratio(), self.n_components, self.components.shape) return str_out
[docs]class PCAModel(PCAVectorModel, VectorizableBackedModel): r""" A :map:`MeanLinearModel` where components are Principal Components and the components are vectorized instances. Principal Component Analysis (PCA) by eigenvalue decomposition of the data's scatter matrix. For details of the implementation of PCA, see :map:`pca`. Parameters ---------- samples : `list` or `iterable` of :map:`Vectorizable` List or iterable of samples to build the model from. centre : `bool`, optional When ``True`` (default) PCA is performed after mean centering the data. If ``False`` the data is assumed to be centred, and the mean will be ``0``. n_samples : `int`, optional If provided then ``samples`` must be an iterator that yields ``n_samples``. If not provided then samples has to be a `list` (so we know how large the data matrix needs to be). max_n_components : `int`, optional The maximum number of components to keep in the model. Any components above and beyond this one are discarded. inplace : `bool`, optional If ``True`` the data matrix is modified in place. Otherwise, the data matrix is copied. verbose : `bool`, optional Whether to print building information or not. """ def __init__(self, samples, centre=True, n_samples=None, max_n_components=None, inplace=True, verbose=False): # build a data matrix from all the samples data, template = as_matrix(samples, length=n_samples, return_template=True, verbose=verbose) n_samples = data.shape[0] PCAVectorModel.__init__(self, data, centre=centre, max_n_components=max_n_components, n_samples=n_samples, inplace=inplace) VectorizableBackedModel.__init__(self, template) @classmethod
[docs] def init_from_covariance_matrix(cls, C, mean, n_samples, centred=True, max_n_components=None): r""" Build the Principal Component Analysis (PCA) by eigenvalue decomposition of the provided covariance/scatter matrix. For details of the implementation of PCA, see :map:`pcacov`. Parameters ---------- C : ``(n_features, n_features)`` `ndarray` The Covariance/Scatter matrix, where `N` is the number of features. mean : :map:`Vectorizable` The mean instance. It must be a :map:`Vectorizable` and *not* an `ndarray`. n_samples : `int` The number of samples used to generate the covariance matrix. centred : `bool`, optional When ``True`` we assume that the data were centered before computing the covariance matrix. max_n_components : `int`, optional The maximum number of components to keep in the model. Any components above and beyond this one are discarded. """ # Create new pca instance self_model = PCAVectorModel.__new__(cls) self_model.n_samples = n_samples # Compute pca on covariance e_vectors, e_values = pcacov(C) # The call to __init__ of MeanLinearModel is done in here self_model._constructor_helper(eigenvalues=e_values, eigenvectors=e_vectors, mean=mean.as_vector(), centred=centred, max_n_components=max_n_components) VectorizableBackedModel.__init__(self_model, mean) return self_model
@classmethod
[docs] def init_from_components(cls, components, eigenvalues, mean, n_samples, centred, max_n_components=None): r""" Build the Principal Component Analysis (PCA) using the provided components (eigenvectors) and eigenvalues. Parameters ---------- components : ``(n_components, n_features)`` `ndarray` The eigenvectors to be used. eigenvalues : ``(n_components, )`` `ndarray` The corresponding eigenvalues. mean : :map:`Vectorizable` The mean instance. It must be a :map:`Vectorizable` and *not* an `ndarray`. n_samples : `int` The number of samples used to generate the eigenvectors. centred : `bool`, optional When ``True`` we assume that the data were centered before computing the eigenvectors. max_n_components : `int`, optional The maximum number of components to keep in the model. Any components above and beyond this one are discarded. """ # Create new pca instance self_model = PCAVectorModel.__new__(cls) self_model.n_samples = n_samples # The call to __init__ of MeanLinearModel is done in here self_model._constructor_helper( eigenvalues=eigenvalues, eigenvectors=components, mean=mean.as_vector(), centred=centred, max_n_components=max_n_components) VectorizableBackedModel.__init__(self_model, mean) return self_model
[docs] def mean(self): r""" Return the mean of the model. :type: :map:`Vectorizable` """ return self.template_instance.from_vector(self._mean)
@property def mean_vector(self): r""" Return the mean of the model as a 1D vector. :type: `ndarray` """ return self._mean @doc_inherit(name='project_out')
[docs] def project_out_vector(self, instance_vector): return PCAVectorModel.project_out(self, instance_vector)
@doc_inherit(name='reconstruct')
[docs] def reconstruct_vector(self, instance_vector): return PCAVectorModel.reconstruct(self, instance_vector)
@doc_inherit(name='project')
[docs] def project_vector(self, instance_vector): return PCAVectorModel.project(self, instance_vector)
@doc_inherit(name='instance')
[docs] def instance_vector(self, weights, normalized_weights=False): return PCAVectorModel.instance(self, weights, normalized_weights=normalized_weights)
@doc_inherit(name='component')
[docs] def component_vector(self, index, with_mean=True, scale=1.0): return PCAVectorModel.component(self, index, with_mean=with_mean, scale=scale)
@doc_inherit(name='project_whitened')
[docs] def project_whitened_vector(self, vector_instance): return PCAVectorModel.project_whitened(self, vector_instance)
[docs] def component(self, index, with_mean=True, scale=1.0): r""" Return a particular component of the linear model. Parameters ---------- index : `int` The component that is to be returned with_mean: `bool`, optional If ``True``, the component will be blended with the mean vector before being returned. If not, the component is returned on it's own. scale : `float`, optional A scale factor that should be applied to the component. Only valid in the case where ``with_mean == True``. See :meth:`component_vector` for how this scale factor is interpreted. Returns ------- component : `type(self.template_instance)` The requested component instance. """ return self.template_instance.from_vector(self.component_vector( index, with_mean=with_mean, scale=scale))
[docs] def instance(self, weights, normalized_weights=False): """ Creates a new instance of the model using the first ``len(weights)`` components. Parameters ---------- weights : ``(n_weights,)`` `ndarray` or `list` ``weights[i]`` is the linear contribution of the i'th component to the instance vector. normalized_weights : `bool`, optional If ``True``, the weights are assumed to be normalized w.r.t the eigenvalues. This can be easier to create unique instances by making the weights more interpretable. Raises ------ ValueError If n_weights > n_components Returns ------- instance : `type(self.template_instance)` An instance of the model. """ v = self.instance_vector(weights, normalized_weights=normalized_weights) return self.template_instance.from_vector(v)
[docs] def project_whitened(self, instance): """ Projects the `instance` onto the whitened components, retrieving the whitened linear weightings. Parameters ---------- instance : :map:`Vectorizable` A novel instance. Returns ------- projected : (n_components,) A vector of whitened linear weightings """ return self.project_whitened_vector(instance.as_vector())
[docs] def increment(self, samples, n_samples=None, forgetting_factor=1.0, verbose=False): r""" Update the eigenvectors, eigenvalues and mean vector of this model by performing incremental PCA on the given samples. Parameters ---------- samples : `list` of :map:`Vectorizable` List of new samples to update the model from. n_samples : `int`, optional If provided then ``samples`` must be an iterator that yields ``n_samples``. If not provided then samples has to be a list (so we know how large the data matrix needs to be). forgetting_factor : ``[0.0, 1.0]`` `float`, optional Forgetting factor that weights the relative contribution of new samples vs old samples. If 1.0, all samples are weighted equally and, hence, the results is the exact same as performing batch PCA on the concatenated list of old and new simples. If <1.0, more emphasis is put on the new samples. See [1] for details. References ---------- .. [1] David Ross, Jongwoo Lim, Ruei-Sung Lin, Ming-Hsuan Yang. "Incremental Learning for Robust Visual Tracking". IJCV, 2007. """ # build a data matrix from the new samples data = as_matrix(samples, length=n_samples, verbose=verbose) n_new_samples = data.shape[0] PCAVectorModel.increment(self, data, n_samples=n_new_samples, forgetting_factor=forgetting_factor, verbose=verbose)
def __str__(self): str_out = 'PCA Model \n' \ ' - instance class: {}\n' \ ' - centred: {}\n' \ ' - # features: {}\n' \ ' - # active components: {}\n' \ ' - kept variance: {:.2} {:.1%}\n' \ ' - noise variance: {:.2} {:.1%}\n' \ ' - total # components: {}\n' \ ' - components shape: {}\n'.format( type(self.template_instance), self.centred, self.n_features, self.n_active_components, self.variance(), self.variance_ratio(), self.noise_variance(), self.noise_variance_ratio(), self.n_components, self.components.shape) return str_out