Source code for menpo.io.input.base

import os
from pathlib import Path
import random
from ..utils import _norm_path
from menpo.base import menpo_src_dir_path
from menpo.visualize import print_progress


[docs]def data_dir_path(): r"""A path to the Menpo built in ./data folder on this machine. Returns ------- ``pathlib.Path`` The path to the local Menpo ./data folder """ return menpo_src_dir_path() / 'data'
[docs]def data_path_to(asset_filename): r""" The path to a builtin asset in the ./data folder on this machine. Parameters ---------- asset_filename : `str` The filename (with extension) of a file builtin to Menpo. The full set of allowed names is given by :func:`ls_builtin_assets()` Returns ------- data_path : `pathlib.Path` The path to a given asset in the ./data folder Raises ------ ValueError If the asset_filename doesn't exist in the `data` folder. """ asset_path = data_dir_path() / asset_filename if not asset_path.is_file(): raise ValueError("{} is not a builtin asset: {}".format( asset_filename, ls_builtin_assets())) return asset_path
def same_name(asset): r""" Menpo's default landmark resolver. Returns all landmarks found to have the same stem as the asset. """ # pattern finding all landmarks with the same stem pattern = asset.path.with_suffix('.*') # find all the landmarks we can with this name. Key is ext (without '.') return {p.suffix[1:].upper(): p for p in landmark_file_paths(pattern)}
[docs]def import_image(filepath, landmark_resolver=same_name, normalise=True): r"""Single image (and associated landmarks) importer. If an image file is found at `filepath`, returns an :map:`Image` or subclass representing it. By default, landmark files sharing the same filename stem will be imported and attached with a group name based on the extension of the landmark file, although this behavior can be customised (see `landmark_resolver`). If the image defines a mask, this mask will be imported. Parameters ---------- filepath : `pathlib.Path` or `str` A relative or absolute filepath to an image file. landmark_resolver : `function`, optional This function will be used to find landmarks for the image. The function should take one argument (the image itself) and return a dictionary of the form ``{'group_name': 'landmark_filepath'}`` Default finds landmarks with the same name as the image file. normalise : `bool`, optional If ``True``, normalise the image pixels between 0 and 1 and convert to floating point. If false, the native datatype of the image will be maintained (commonly `uint8`). Note that in general Menpo assumes :map:`Image` instances contain floating point data - if you disable this flag you will have to manually convert the images you import to floating point before doing most Menpo operations. This however can be useful to save on memory usage if you only wish to view or crop images. Returns ------- images : :map:`Image` or list of An instantiated :map:`Image` or subclass thereof or a list of images. """ kwargs = {'normalise': normalise} return _import(filepath, image_types, landmark_ext_map=image_landmark_types, landmark_resolver=landmark_resolver, importer_kwargs=kwargs)
[docs]def import_landmark_file(filepath, asset=None): r"""Single landmark group importer. If a landmark file is found at ``filepath``, returns a :map:`LandmarkGroup` representing it. Parameters ---------- filepath : `pathlib.Path` or `str` A relative or absolute filepath to an landmark file. Returns ------- landmark_group : :map:`LandmarkGroup` The :map:`LandmarkGroup` that the file format represents. """ return _import(filepath, image_landmark_types, asset=asset)
[docs]def import_pickle(filepath): r"""Import a pickle file of arbitrary Python objects. Menpo unambiguously uses ``.pkl`` as it's choice of extension for Pickle files. Menpo also supports automatic importing and exporting of gzip compressed pickle files - just choose a ``filepath`` ending ``pkl.gz`` and gzip compression will automatically be applied. Compression can massively reduce the filesize of a pickle file at the cost of longer import and export times. Parameters ---------- filepath : `pathlib.Path` or `str` A relative or absolute filepath to a ``.pkl`` or ``.pkl.gz`` file. Returns ------- object : `object` Whatever Python objects are present in the Pickle file """ return _import(filepath, pickle_types)
[docs]def import_images(pattern, max_images=None, shuffle=False, landmark_resolver=same_name, normalise=True, verbose=False): r"""Multiple image (and associated landmarks) importer. For each image found yields an :map:`Image` or subclass representing it. By default, landmark files sharing the same filename stem will be imported and attached with a group name based on the extension of the landmark file, although this behavior can be customised (see `landmark_resolver`). If the image defines a mask, this mask will be imported. Note that this is a generator function. This allows for pre-processing of data to take place as data is imported (e.g. cropping images to landmarks as they are imported for memory efficiency). Parameters ---------- pattern : `str` A glob path pattern to search for images. Every image found to match the glob will be imported one by one. See :map:`image_paths` for more details of what images will be found. max_images : positive `int`, optional If not ``None``, only import the first ``max_images`` found. Else, import all. shuffle : `bool`, optional If ``True``, the order of the returned images will be randomised. If ``False``, the order of the returned images will be alphanumerically ordered. landmark_resolver : `function`, optional This function will be used to find landmarks for the image. The function should take one argument (the image itself) and return a dictionary of the form ``{'group_name': 'landmark_filepath'}`` Default finds landmarks with the same name as the image file. normalise : `bool`, optional If ``True``, normalise the image pixels between 0 and 1 and convert to floating point. If false, the native datatype of the image will be maintained (commonly `uint8`). Note that in general Menpo assumes :map:`Image` instances contain floating point data - if you disable this flag you will have to manually convert the images you import to floating point before doing most Menpo operations. This however can be useful to save on memory usage if you only wish to view or crop images. verbose : `bool`, optional If ``True`` progress of the importing will be dynamically reported with a progress bar. Returns ------- generator : `generator` yielding :map:`Image` or list of Generator yielding :map:`Image` instances found to match the glob pattern provided. Raises ------ ValueError If no images are found at the provided glob. Examples -------- Import images at 20% scale from a huge collection: >>> images = [] >>> for img in menpo.io.import_images('./massive_image_db/*'): >>> # rescale to a sensible size as we go >>> images.append(img.rescale(0.2)) """ kwargs = {'normalise': normalise} for asset in _import_glob_generator(pattern, image_types, max_assets=max_images, shuffle=shuffle, landmark_resolver=landmark_resolver, landmark_ext_map=image_landmark_types, verbose=verbose, importer_kwargs=kwargs): yield asset
[docs]def import_landmark_files(pattern, max_landmarks=None, shuffle=False, verbose=False): r"""Multiple landmark file import generator. Note that this is a generator function. Parameters ---------- pattern : `str` A glob path pattern to search for landmark files. Every landmark file found to match the glob will be imported one by one. See :map:`landmark_file_paths` for more details of what landmark files will be found. max_landmark_files : positive `int`, optional If not ``None``, only import the first ``max_landmark_files`` found. Else, import all. shuffle : `bool`, optional If ``True``, the order of the returned landmark files will be randomised. If ``False``, the order of the returned landmark files will be alphanumerically ordered. verbose : `bool`, optional If ``True`` progress of the importing will be dynamically reported. Returns ------- generator : `generator` yielding :map:`LandmarkGroup` Generator yielding :map:`LandmarkGroup` instances found to match the glob pattern provided. Raises ------ ValueError If no landmarks are found at the provided glob. """ for asset in _import_glob_generator(pattern, image_landmark_types, max_assets=max_landmarks, shuffle=shuffle, verbose=verbose): yield asset
[docs]def import_pickles(pattern, max_pickles=None, shuffle=False, verbose=False): r"""Multiple pickle file import generator. Note that this is a generator function. Menpo unambiguously uses ``.pkl`` as it's choice of extension for pickle files. Menpo also supports automatic importing of gzip compressed pickle files - matching files with extension ``pkl.gz`` will be automatically un-gzipped and imported. Parameters ---------- pattern : `str` The glob path pattern to search for pickles. Every pickle file found to match the glob will be imported one by one. max_pickles : positive `int`, optional If not ``None``, only import the first ``max_pickles`` found. Else, import all. shuffle : `bool`, optional If ``True``, the order of the returned pickles will be randomised. If ``False``, the order of the returned pickles will be alphanumerically ordered. verbose : `bool`, optional If ``True`` progress of the importing will be dynamically reported. Returns ------- generator : generator yielding `object` Generator yielding whatever Python object is present in the pickle files that match the glob pattern provided. Raises ------ ValueError If no pickles are found at the provided glob. """ for asset in _import_glob_generator(pattern, pickle_types, max_assets=max_pickles, shuffle=shuffle, verbose=verbose): yield asset
def _import_builtin_asset(asset_name, **kwargs): r"""Single builtin asset (landmark or image) importer. Imports the relevant builtin asset from the ``./data`` directory that ships with Menpo. Parameters ---------- asset_name : `str` The filename of a builtin asset (see :map:`ls_builtin_assets` for allowed values) Returns ------- asset : An instantiated :map:`Image` or :map:`LandmarkGroup` asset. """ asset_path = data_path_to(asset_name) # Import could be either an image or a set of landmarks, so we try # importing them both separately. try: return _import(asset_path, image_types, landmark_ext_map=image_landmark_types, importer_kwargs=kwargs) except ValueError: return _import(asset_path, image_landmark_types, importer_kwargs=kwargs)
[docs]def ls_builtin_assets(): r"""List all the builtin asset examples provided in Menpo. Returns ------- list of strings Filenames of all assets in the data directory shipped with Menpo """ return [p.name for p in data_dir_path().glob('*') if not p.is_dir()]
def import_builtin(x): def execute(**kwargs): return _import_builtin_asset(x, **kwargs) return execute class BuiltinAssets(object): def __call__(self, asset_name, **kwargs): return _import_builtin_asset(asset_name, **kwargs) import_builtin_asset = BuiltinAssets() for asset in ls_builtin_assets(): setattr(import_builtin_asset, asset.replace('.', '_'), import_builtin(asset))
[docs]def image_paths(pattern): r""" Return image filepaths that Menpo can import that match the glob pattern. """ return glob_with_suffix(pattern, image_types)
[docs]def landmark_file_paths(pattern): r""" Return landmark file filepaths that Menpo can import that match the glob pattern. """ return glob_with_suffix(pattern, image_landmark_types)
def _import_glob_generator(pattern, extension_map, max_assets=None, landmark_resolver=same_name, shuffle=False, landmark_ext_map=None, importer_kwargs=None, verbose=False): filepaths = list(glob_with_suffix(pattern, extension_map, sort=(not shuffle))) if shuffle: random.shuffle(filepaths) if max_assets: filepaths = filepaths[:max_assets] n_files = len(filepaths) if n_files == 0: raise ValueError('The glob {} yields no assets'.format(pattern)) generator = _multi_import_generator( filepaths, extension_map, landmark_resolver=landmark_resolver, landmark_ext_map=landmark_ext_map, importer_kwargs=importer_kwargs) if verbose: # wrap the generator with the progress reporter generator = print_progress(generator, prefix='Importing assets', n_items=n_files) return generator def _import(filepath, extensions_map, keep_importer=False, landmark_resolver=same_name, landmark_ext_map=None, asset=None, importer_kwargs=None): r""" Creates an importer for the filepath passed in, and then calls build on it, returning a list of assets or a single asset, depending on the file type. The type of assets returned are specified by the `extensions_map`. Parameters ---------- filepath : string The filepath to import extensions_map : dictionary (String, :class:`menpo.io.base.Importer`) A map from extensions to importers. The importers are expected to be non-instantiated classes. The extensions are expected to contain the leading period eg. `.obj`. keep_importer : bool, optional If `True`, return the :class:`menpo.io.base.Importer` for each mesh as well as the meshes. landmark_ext_map : dictionary (str, :map:`Importer`), optional If not None an attempt will be made to import annotations with extensions defined in this mapping. If None, no attempt will be made to import annotations. landmark_resolver: function, optional If not None, this function will be used to find landmarks for each asset. The function should take one argument (the asset itself) and return a dictionary of the form {'group_name': 'landmark_filepath'} asset: object, optional If not None, the asset will be passed to the importer's build method as the asset kwarg importer_kwargs: dict, optional: kwargs that will be supplied to the importer if not None Returns ------- assets : list of assets or tuple of (assets, [:class:`menpo.io.base .Importer`]) The asset or list of assets found in the filepath. If `keep_importers` is `True` then the importer is returned. """ path = Path(_norm_path(filepath)) if not path.is_file(): raise ValueError("{} is not a file".format(path)) # below could raise ValueError as well... importer = importer_for_filepath(path, extensions_map, importer_kwargs=importer_kwargs) if asset is not None: built_objects = importer.build(asset=asset) else: built_objects = importer.build() # landmarks are iterable so check for list precisely # enforce a list to make processing consistent if not isinstance(built_objects, list): built_objects = [built_objects] # attach path if there is no x.path already. for x in built_objects: if not hasattr(x, 'path'): try: x.path = path except AttributeError: pass # that's fine! Probably a dict/list from PickleImporter. # handle landmarks if landmark_ext_map is not None: for x in built_objects: lm_paths = landmark_resolver(x) # use the users fcn to find # paths if lm_paths is None: continue for group_name, lm_path in lm_paths.items(): lms = _import(lm_path, landmark_ext_map, asset=x) if x.n_dims == lms.n_dims: x.landmarks[group_name] = lms # undo list-ification (if we added it!) if len(built_objects) == 1: built_objects = built_objects[0] if keep_importer: return built_objects, importer else: return built_objects def _multi_import_generator(filepaths, extensions_map, keep_importers=False, landmark_resolver=same_name, landmark_ext_map=None, importer_kwargs=None): r""" Generator yielding assets from the filepaths provided. Note that if a single file yields multiple assets, each is yielded in turn (this function will never yield an iterable of assets in one go). Parameters ---------- filepaths : list of strings The filepaths to import. Assets are imported in alphabetical order extensions_map : dictionary (String, :class:`menpo.io.base.Importer`) A map from extensions to importers. The importers are expected to be non-instantiated classes. The extensions are expected to contain the leading period eg. `.obj`. keep_importers : bool, optional If `True`, return the :class:`menpo.io.base.Importer` for each mesh as well as the meshes. landmark_ext_map : dictionary (str, :map:`Importer`), optional If not None an attempt will be made to import annotations with extensions defined in this mapping. If None, no attempt will be made to import annotations. landmark_resolver: function, optional If not None, this function will be used to find landmarks for each asset. The function should take one argument (the asset itself) and return a dictionary of the form {'group_name': 'landmark_filepath'} importer_kwargs: dict, optional kwargs to be supplied to the importer if not None Yields ------ asset : An asset found at one of the filepaths. importer: :class:`menpo.io.base.Importer` Only if `keep_importers` is `True`. The importer used for the yielded asset. """ importer = None for f in filepaths: imported = _import(f, extensions_map, keep_importer=keep_importers, landmark_resolver=landmark_resolver, landmark_ext_map=landmark_ext_map, importer_kwargs=importer_kwargs) if keep_importers: assets, importer = imported else: assets = imported # could be that there are many assets returned from one file. # landmarks are iterable so check for list precisely if isinstance(assets, list): # there are multiple assets, and one importer. # -> yield each asset in turn with the shared importer (if # requested) for asset in assets: if keep_importers: yield asset, importer else: yield asset else: # assets is a single item. Rather than checking (again! for # importers, just yield the imported tuple yield imported def _pathlib_glob_for_pattern(pattern, sort=True): r"""Generator for glob matching a string path pattern Splits the provided ``pattern`` into a root path for pathlib and a subsequent glob pattern to be applied. Parameters ---------- pattern : `str` Path including glob patterns. If no glob patterns are present and the pattern is a dir, a '**/*' pattern will be automatically added. sort : `bool`, optional If True, the returned paths will be sorted. If False, no guarantees are made about the ordering of the results. Yields ------ Path : A path to a file matching the provided pattern. Raises ------ ValueError If the pattern doesn't contain a '*' wildcard and is not a directory """ pattern = _norm_path(pattern) gsplit = pattern.split('*', 1) if len(gsplit) == 1: # no glob provided. Is the provided pattern a dir? if Path(pattern).is_dir(): preglob = pattern pattern = '*' else: raise ValueError('{} is an invalid glob and ' 'not a dir'.format(pattern)) else: preglob = gsplit[0] pattern = '*' + gsplit[1] if not os.path.isdir(preglob): # the glob pattern is in the middle of a path segment. pair back # to the nearest dir and add the reminder to the pattern preglob, pattern_prefix = os.path.split(preglob) pattern = pattern_prefix + pattern p = Path(preglob) paths = p.glob(str(pattern)) if sort: paths = sorted(paths) return paths def glob_with_suffix(pattern, extensions_map, sort=True): r""" Filters the results from the glob pattern passed in to only those files that have an importer given in `extensions_map`. Parameters ---------- pattern : string A UNIX style glob pattern to match against. extensions_map : dictionary (String, :class:`menpo.io.base.Importer`) A map from extensions to importers. The importers are expected to be non-instantiated classes. The extensions are expected to contain the leading period eg. `.obj`. sort : `bool`, optional If True, the returned paths will be sorted. If False, no guarantees are made about the ordering of the results. Yields ------ filepaths : list of string The list of filepaths that have valid extensions. """ for path in _pathlib_glob_for_pattern(pattern, sort=sort): # we want to extract '.pkl.gz' as an extension - for this we need to # use suffixes and join. # .suffix only takes # However, the filename might have a '.' in it, e.g. '1.1.png'. # In this case, try again with just the suffix. if (''.join(path.suffixes[-2:]) in extensions_map or path.suffix in extensions_map): yield path def importer_for_filepath(filepath, extensions_map, importer_kwargs=None): r""" Given a filepath, return the appropriate importer as mapped by the extension map. Parameters ---------- filepath : `pathlib.Path` The filepath to get importers for extensions_map : dictionary (String, :class:`menpo.io.base.Importer`) A map from extensions to importers. The importers are expected to be a subclass of :class:`Importer`. The extensions are expected to contain the leading period eg. `.obj`. importer_kwargs: dictionary, optional kwargs that will be supplied to the importer if not None. Returns -------- importer: :class:`menpo.io.base.Importer` instance Importer as found in the `extensions_map` instantiated for the filepath provided. """ suffix = ''.join(filepath.suffixes) if suffix.isupper(): # If for some reason the ending is in capital letters, make them lower case first. suffix = suffix.lower() importer_type = extensions_map.get(suffix) # we couldn't find an importer for all the suffixes (e.g .foo.bar) # maybe the file stem has '.' in it? -> try again but this time just use the # final suffix (.bar). (Note we first try '.foo.bar' as we want to catch # cases like 'pkl.gz') if importer_type is None and len(filepath.suffixes) > 1: suffix = filepath.suffix importer_type = extensions_map.get(suffix) if importer_type is None: raise ValueError("{} does not have a " "suitable importer.".format(suffix)) if importer_kwargs is not None: return importer_type(str(filepath), **importer_kwargs) else: return importer_type(str(filepath)) class Importer(object): r""" Abstract representation of an Importer. Construction of an importer simply sets the filepaths etc up. To actually import the object and build a valid representation, the `build` method must be called. This allows a set of importers to be instantiated but the heavy duty importing to happen separately. Parameters ---------- filepath : string An absolute filepath """ def __init__(self, filepath): self.filepath = os.path.abspath(os.path.expanduser(filepath)) self.filename = os.path.splitext(os.path.basename(self.filepath))[0] self.extension = os.path.splitext(self.filepath)[1] self.folder = os.path.dirname(self.filepath) def build(self): r""" Performs the heavy lifting for the importer class. This actually reads the file in from disk and does any necessary parsing of the data in to an appropriate format. Returns ------- object : object or list An instantiated class of the expected type. For example, for an `.obj` importer, this would be a :class:`menpo.shape.mesh.base.Trimesh`. If multiple objects need to be returned from one importer, a list must be returned. """ raise NotImplementedError() # Avoid circular imports from menpo.io.input.extensions import (image_landmark_types, image_types, pickle_types)