Source code for menpo.feature.optional.vlfeat

from __future__ import division
from functools import partial
import numpy as np
from menpo.base import MenpoMissingDependencyError
from ..base import winitfeature

try:
    from cyvlfeat.sift.dsift import dsift as cyvlfeat_dsift
except ImportError:
    raise MenpoMissingDependencyError('cyvlfeat')


@winitfeature
[docs]def dsift(pixels, window_step_horizontal=1, window_step_vertical=1,
          num_bins_horizontal=2, num_bins_vertical=2, num_or_bins=9,
          cell_size_horizontal=6, cell_size_vertical=6, fast=True,
          verbose=False):
    r"""
    Computes a 2-dimensional dense SIFT features image with ``C`` number of
    channels, where
    ``C = num_bins_horizontal * num_bins_vertical * num_or_bins``. The dense
    SIFT [2]_ implementation is taken from Vlfeat [1]_.

    Parameters
    ----------
    pixels : :map:`Image` or subclass or ``(C, Y, X)`` `ndarray`
        Either the image object itself or an array with the pixels. The first
        dimension is interpreted as channels.
    window_step_horizontal : `int`, optional
        Defines the horizontal step by which the window is moved, thus it
        controls the features density. The metric unit is pixels.
    window_step_vertical : `int`, optional
        Defines the vertical step by which the window is moved, thus it
        controls the features density. The metric unit is pixels.
    num_bins_horizontal : `int`, optional
        Defines the number of histogram bins in the X direction.
    num_bins_vertical : `int`, optional
        Defines the number of histogram bins in the Y direction.
    num_or_bins : `int`, optional
        Defines the number of orientation histogram bins.
    cell_size_horizontal : `int`, optional
        Defines cell width in pixels. The cell is the region that is covered by
        a spatial bin.
    cell_size_vertical : `int`, optional
        Defines cell height in pixels. The cell is the region that is covered by
        a spatial bin.
    fast : `bool`, optional
        If ``True``, then the windowing function is a piecewise-flat, rather
        than Gaussian. While this breaks exact SIFT equivalence, in practice it
        is much faster to compute.
    verbose : `bool`, optional
        Flag to print SIFT related information.

    Raises
    ------
    ValueError
        Only 2D arrays are supported
    ValueError
        Size must only contain positive integers.
    ValueError
        Step must only contain positive integers.
    ValueError
        Window size must be a positive integer.
    ValueError
        Geometry must only contain positive integers.

    References
    ----------
    .. [1] Vedaldi, Andrea, and Brian Fulkerson. "VLFeat: An open and portable
       library of computer vision algorithms." Proceedings of the international
       conference on Multimedia. ACM, 2010.
    .. [2] Lowe, David G. "Distinctive image features from scale-invariant
       keypoints." International journal of computer vision 60.2 (2004): 91-110.
    """
    # If norm is set to True, then the centers array will have a third column
    # with descriptor norm, or energy, before contrast normalization.
    # This information can be used to suppress low contrast descriptors.
    centers, output = cyvlfeat_dsift(
        pixels[0], step=[window_step_vertical, window_step_horizontal],
        size=[cell_size_vertical, cell_size_horizontal], bounds=None,
        norm=False, fast=fast, float_descriptors=True,
        geometry=(num_bins_vertical, num_bins_horizontal, num_or_bins),
        verbose=False)

    # the output shape can be calculated from looking at the range of
    # centres / the window step size in each dimension. Note that cyvlfeat
    # returns x, y centres.
    shape = (((centers[-1, :] - centers[0, :]) /
              [window_step_vertical, window_step_horizontal]) + 1)

    # print information
    if verbose:
        info_str = "Dense SIFT features:\n" \
                   "  - Input image is {}W x {}H with {} channels.\n" \
                   "  - Sampling step of ({}W, {}H).\n" \
                   "  - {}W x {}H spatial bins and {} orientation bins.\n" \
                   "  - Cell size of {}W x {}H pixels.\n".format(
                   pixels.shape[2], pixels.shape[1], pixels.shape[0],
                   window_step_horizontal, window_step_vertical,
                   num_bins_horizontal, num_bins_vertical, num_or_bins,
                   cell_size_horizontal, cell_size_vertical)
        if fast:
            info_str += "  - Fast mode is enabled.\n"
        info_str += "Output image size {}W x {}H x {}.".format(
            int(shape[1]), int(shape[0]), output.shape[0])
        print(info_str)

    # return SIFT and centers in the correct form
    return (np.require(np.rollaxis(output.reshape((shape[0], shape[1], -1)),
                                   -1),
                       dtype=np.double, requirements=['C']),
            np.require(centers.reshape((shape[0], shape[1], -1)),
                       dtype=np.int))


# A predefined method for a 'faster' dsift method
fast_dsift = partial(dsift, fast=True, cell_size_vertical=5,
                     cell_size_horizontal=5, num_bins_horizontal=1,
                     num_bins_vertical=1, num_or_bins=8)
fast_dsift.__name__ = 'fast_dsift'
fast_dsift.__doc__ = dsift.__doc__


# Predefined dsift that returns a 128d vector
[docs]def vector_128_dsift(x, dtype=np.float32):
    r"""
    Computes a SIFT feature vector from a square patch (or image). Patch
    **must** be square and the output vector will *always* be a ``(128,)``
    vector. Please see :func:`dsift` for more information.

    Parameters
    ----------
    x : :map:`Image` or subclass or ``(C, Y, Y)`` `ndarray`
        Either the image object itself or an array with the pixels. The first
        dimension is interpreted as channels. Must be square i.e.
        ``height == width``.
    dtype : ``np.dtype``, optional
        The dtype of the returned vector.

    Raises
    ------
    ValueError
        Only square images are supported.
    """
    if not isinstance(x, np.ndarray):
        x = x.pixels
    if x.shape[-1] != x.shape[-2]:
        raise ValueError('This feature only works with square images '
                         'i.e. width == height')
    patch_shape = x.shape[-1]
    n_bins = 4
    c_size = patch_shape // n_bins
    if x.dtype == np.uint8:
        x *= (1.0 / 255.0)
    return dsift(x,
                 window_step_horizontal=patch_shape,
                 window_step_vertical=patch_shape,
                 num_bins_horizontal=n_bins, num_bins_vertical=n_bins,
                 cell_size_horizontal=c_size, cell_size_vertical=c_size,
                 num_or_bins=8, fast=True).astype(dtype)


# Predefined dsift that returns a 128d vector normalized by the hellinger norm
[docs]def hellinger_vector_128_dsift(x):
    r"""
    Computes a SIFT feature vector from a square patch (or image). Patch
    **must** be square and the output vector will *always* be a ``(128,)``
    vector. Please see :func:`dsift` for more information.

    The output of :func:`vector_128_dsift` is normalised using the hellinger
    norm (also called the Bhattacharyya distance) which is a measure
    designed to quantify the similarity between two probability distributions.
    Since SIFT is a histogram based feature, this has been shown to improve
    performance. Please see [1]_ for more information.

    Parameters
    ----------
    x : :map:`Image` or subclass or ``(C, Y, Y)`` `ndarray`
        Either the image object itself or an array with the pixels. The first
        dimension is interpreted as channels. Must be square i.e.
        ``height == width``.
    dtype : ``np.dtype``, optional
        The dtype of the returned vector.

    Raises
    ------
    ValueError
        Only square images are supported.

    References
    ----------
    .. [1] Arandjelovic, Relja, and Andrew Zisserman. "Three things everyone
           should know to improve object retrieval.", CVPR, 2012.
    """
    h = vector_128_dsift(x)
    h /= (h.sum(axis=0) + 1e-15)
    return np.sqrt(h)