Shortcuts

Source code for mmengine.fileio.io

# Copyright (c) OpenMMLab. All rights reserved.
"""This module provides unified file I/O related functions, which support
operating I/O with different file backends based on the specified filepath or
backend_args.

MMEngine currently supports five file backends:

- LocalBackend
- PetrelBackend
- HTTPBackend
- LmdbBackend
- MemcacheBackend

Note that this module provide a union of all of the above file backends so
NotImplementedError will be raised if the interface in the file backend is not
implemented.

There are two ways to call a method of a file backend:

- Initialize a file backend with ``get_file_backend`` and call its methods.
- Directory call unified I/O functions, which will call ``get_file_backend``
  first and then call the corresponding backend method.

Examples:
    >>> # Initialize a file backend and call its methods
    >>> import mmengine.fileio as fileio
    >>> backend = fileio.get_file_backend(backend_args={'backend': 'petrel'})
    >>> backend.get('s3://path/of/your/file')

    >>> # Directory call unified I/O functions
    >>> fileio.get('s3://path/of/your/file')
"""
import json
import warnings
from contextlib import contextmanager
from io import BytesIO, StringIO
from pathlib import Path
from typing import Generator, Iterator, Optional, Tuple, Union

from mmengine.utils import is_filepath, is_str
from .backends import backends, prefix_to_backends
from .file_client import FileClient
# file_handlers and register_handler had been moved to
# mmengine/fileio/handlers/registry_utis. Import them
# in this file to keep backward compatibility.
from .handlers import file_handlers, register_handler  # noqa: F401

backend_instances: dict = {}


def _parse_uri_prefix(uri: Union[str, Path]) -> str:
    """Parse the prefix of uri.

    Args:
        uri (str or Path): Uri to be parsed that contains the file prefix.

    Examples:
        >>> _parse_uri_prefix('/home/path/of/your/file')
        ''
        >>> _parse_uri_prefix('s3://path/of/your/file')
        's3'
        >>> _parse_uri_prefix('clusterName:s3://path/of/your/file')
        's3'

    Returns:
        str: Return the prefix of uri if the uri contains '://'. Otherwise,
        return ''.
    """
    assert is_filepath(uri)
    uri = str(uri)
    # if uri does not contains '://', the uri will be handled by
    # LocalBackend by default
    if '://' not in uri:
        return ''
    else:
        prefix, _ = uri.split('://')
        # In the case of PetrelBackend, the prefix may contain the cluster
        # name like clusterName:s3://path/of/your/file
        if ':' in prefix:
            _, prefix = prefix.split(':')
        return prefix


def _get_file_backend(prefix: str, backend_args: dict):
    """Return a file backend based on the prefix or backend_args.

    Args:
        prefix (str): Prefix of uri.
        backend_args (dict): Arguments to instantiate the corresponding
            backend.
    """
    # backend name has a higher priority
    if 'backend' in backend_args:
        # backend_args should not be modified
        backend_args_bak = backend_args.copy()
        backend_name = backend_args_bak.pop('backend')
        backend = backends[backend_name](**backend_args_bak)
    else:
        backend = prefix_to_backends[prefix](**backend_args)
    return backend


[docs]def get_file_backend( uri: Union[str, Path, None] = None, *, backend_args: Optional[dict] = None, enable_singleton: bool = False, ): """Return a file backend based on the prefix of uri or backend_args. Args: uri (str or Path): Uri to be parsed that contains the file prefix. backend_args (dict, optional): Arguments to instantiate the corresponding backend. Defaults to None. enable_singleton (bool): Whether to enable the singleton pattern. If it is True, the backend created will be reused if the signature is same with the previous one. Defaults to False. Returns: BaseStorageBackend: Instantiated Backend object. Examples: >>> # get file backend based on the prefix of uri >>> uri = 's3://path/of/your/file' >>> backend = get_file_backend(uri) >>> # get file backend based on the backend_args >>> backend = get_file_backend(backend_args={'backend': 'petrel'}) >>> # backend name has a higher priority if 'backend' in backend_args >>> backend = get_file_backend(uri, backend_args={'backend': 'petrel'}) """ global backend_instances if backend_args is None: backend_args = {} if uri is None and 'backend' not in backend_args: raise ValueError( 'uri should not be None when "backend" does not exist in ' 'backend_args') if uri is not None: prefix = _parse_uri_prefix(uri) else: prefix = '' if enable_singleton: # TODO: whether to pass sort_key to json.dumps unique_key = f'{prefix}:{json.dumps(backend_args)}' if unique_key in backend_instances: return backend_instances[unique_key] backend = _get_file_backend(prefix, backend_args) backend_instances[unique_key] = backend return backend else: backend = _get_file_backend(prefix, backend_args) return backend
[docs]def get( filepath: Union[str, Path], backend_args: Optional[dict] = None, ) -> bytes: """Read bytes from a given ``filepath`` with 'rb' mode. Args: filepath (str or Path): Path to read data. backend_args (dict, optional): Arguments to instantiate the corresponding backend. Defaults to None. Returns: bytes: Expected bytes object. Examples: >>> filepath = '/path/of/file' >>> get(filepath) b'hello world' """ backend = get_file_backend( filepath, backend_args=backend_args, enable_singleton=True) return backend.get(filepath)
[docs]def get_text( filepath: Union[str, Path], encoding='utf-8', backend_args: Optional[dict] = None, ) -> str: """Read text from a given ``filepath`` with 'r' mode. Args: filepath (str or Path): Path to read data. encoding (str): The encoding format used to open the ``filepath``. Defaults to 'utf-8'. backend_args (dict, optional): Arguments to instantiate the corresponding backend. Defaults to None. Returns: str: Expected text reading from ``filepath``. Examples: >>> filepath = '/path/of/file' >>> get_text(filepath) 'hello world' """ backend = get_file_backend( filepath, backend_args=backend_args, enable_singleton=True) return backend.get_text(filepath, encoding)
[docs]def put( obj: bytes, filepath: Union[str, Path], backend_args: Optional[dict] = None, ) -> None: """Write bytes to a given ``filepath`` with 'wb' mode. Note: ``put`` should create a directory if the directory of ``filepath`` does not exist. Args: obj (bytes): Data to be written. filepath (str or Path): Path to write data. backend_args (dict, optional): Arguments to instantiate the corresponding backend. Defaults to None. Examples: >>> filepath = '/path/of/file' >>> put(b'hello world', filepath) """ backend = get_file_backend( filepath, backend_args=backend_args, enable_singleton=True) backend.put(obj, filepath)
[docs]def put_text( obj: str, filepath: Union[str, Path], backend_args: Optional[dict] = None, ) -> None: """Write text to a given ``filepath`` with 'w' mode. Note: ``put_text`` should create a directory if the directory of ``filepath`` does not exist. Args: obj (str): Data to be written. filepath (str or Path): Path to write data. encoding (str, optional): The encoding format used to open the ``filepath``. Defaults to 'utf-8'. backend_args (dict, optional): Arguments to instantiate the corresponding backend. Defaults to None. Examples: >>> filepath = '/path/of/file' >>> put_text('hello world', filepath) """ backend = get_file_backend( filepath, backend_args=backend_args, enable_singleton=True) backend.put_text(obj, filepath)
[docs]def exists( filepath: Union[str, Path], backend_args: Optional[dict] = None, ) -> bool: """Check whether a file path exists. Args: filepath (str or Path): Path to be checked whether exists. backend_args (dict, optional): Arguments to instantiate the corresponding backend. Defaults to None. Returns: bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise. Examples: >>> filepath = '/path/of/file' >>> exists(filepath) True """ backend = get_file_backend( filepath, backend_args=backend_args, enable_singleton=True) return backend.exists(filepath)
[docs]def isdir( filepath: Union[str, Path], backend_args: Optional[dict] = None, ) -> bool: """Check whether a file path is a directory. Args: filepath (str or Path): Path to be checked whether it is a directory. backend_args (dict, optional): Arguments to instantiate the corresponding backend. Defaults to None. Returns: bool: Return ``True`` if ``filepath`` points to a directory, ``False`` otherwise. Examples: >>> filepath = '/path/of/dir' >>> isdir(filepath) True """ backend = get_file_backend( filepath, backend_args=backend_args, enable_singleton=True) return backend.isdir(filepath)
[docs]def isfile( filepath: Union[str, Path], backend_args: Optional[dict] = None, ) -> bool: """Check whether a file path is a file. Args: filepath (str or Path): Path to be checked whether it is a file. backend_args (dict, optional): Arguments to instantiate the corresponding backend. Defaults to None. Returns: bool: Return ``True`` if ``filepath`` points to a file, ``False`` otherwise. Examples: >>> filepath = '/path/of/file' >>> isfile(filepath) True """ backend = get_file_backend( filepath, backend_args=backend_args, enable_singleton=True) return backend.isfile(filepath)
[docs]def join_path( filepath: Union[str, Path], *filepaths: Union[str, Path], backend_args: Optional[dict] = None, ) -> Union[str, Path]: r"""Concatenate all file paths. Join one or more filepath components intelligently. The return value is the concatenation of filepath and any members of \*filepaths. Args: filepath (str or Path): Path to be concatenated. *filepaths (str or Path): Other paths to be concatenated. backend_args (dict, optional): Arguments to instantiate the corresponding backend. Defaults to None. Returns: str: The result of concatenation. Examples: >>> filepath1 = '/path/of/dir1' >>> filepath2 = 'dir2' >>> filepath3 = 'path/of/file' >>> join_path(filepath1, filepath2, filepath3) '/path/of/dir/dir2/path/of/file' """ backend = get_file_backend( filepath, backend_args=backend_args, enable_singleton=True) return backend.join_path(filepath, *filepaths)
[docs]@contextmanager def get_local_path( filepath: Union[str, Path], backend_args: Optional[dict] = None, ) -> Generator[Union[str, Path], None, None]: """Download data from ``filepath`` and write the data to local path. ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It can be called with ``with`` statement, and when exists from the ``with`` statement, the temporary path will be released. Note: If the ``filepath`` is a local path, just return itself and it will not be released (removed). Args: filepath (str or Path): Path to be read data. backend_args (dict, optional): Arguments to instantiate the corresponding backend. Defaults to None. Yields: Iterable[str]: Only yield one path. Examples: >>> with get_local_path('s3://bucket/abc.jpg') as path: ... # do something here """ backend = get_file_backend( filepath, backend_args=backend_args, enable_singleton=True) with backend.get_local_path(str(filepath)) as local_path: yield local_path
[docs]def copyfile( src: Union[str, Path], dst: Union[str, Path], backend_args: Optional[dict] = None, ) -> Union[str, Path]: """Copy a file src to dst and return the destination file. src and dst should have the same prefix. If dst specifies a directory, the file will be copied into dst using the base filename from src. If dst specifies a file that already exists, it will be replaced. Args: src (str or Path): A file to be copied. dst (str or Path): Copy file to dst. backend_args (dict, optional): Arguments to instantiate the corresponding backend. Defaults to None. Returns: str: The destination file. Raises: SameFileError: If src and dst are the same file, a SameFileError will be raised. Examples: >>> # dst is a file >>> src = '/path/of/file' >>> dst = '/path1/of/file1' >>> # src will be copied to '/path1/of/file1' >>> copyfile(src, dst) '/path1/of/file1' >>> # dst is a directory >>> dst = '/path1/of/dir' >>> # src will be copied to '/path1/of/dir/file' >>> copyfile(src, dst) '/path1/of/dir/file' """ backend = get_file_backend( src, backend_args=backend_args, enable_singleton=True) return backend.copyfile(src, dst)
[docs]def copytree( src: Union[str, Path], dst: Union[str, Path], backend_args: Optional[dict] = None, ) -> Union[str, Path]: """Recursively copy an entire directory tree rooted at src to a directory named dst and return the destination directory. src and dst should have the same prefix and dst must not already exist. Args: src (str or Path): A directory to be copied. dst (str or Path): Copy directory to dst. backend_args (dict, optional): Arguments to instantiate the corresponding backend. Defaults to None. Returns: str: The destination directory. Raises: FileExistsError: If dst had already existed, a FileExistsError will be raised. Examples: >>> src = '/path/of/dir1' >>> dst = '/path/of/dir2' >>> copytree(src, dst) '/path/of/dir2' """ backend = get_file_backend( src, backend_args=backend_args, enable_singleton=True) return backend.copytree(src, dst)
[docs]def copyfile_from_local( src: Union[str, Path], dst: Union[str, Path], backend_args: Optional[dict] = None, ) -> Union[str, Path]: """Copy a local file src to dst and return the destination file. Note: If the backend is the instance of LocalBackend, it does the same thing with :func:`copyfile`. Args: src (str or Path): A local file to be copied. dst (str or Path): Copy file to dst. backend_args (dict, optional): Arguments to instantiate the corresponding backend. Defaults to None. Returns: str: If dst specifies a directory, the file will be copied into dst using the base filename from src. Examples: >>> # dst is a file >>> src = '/path/of/file' >>> dst = 's3://openmmlab/mmengine/file1' >>> # src will be copied to 's3://openmmlab/mmengine/file1' >>> copyfile_from_local(src, dst) s3://openmmlab/mmengine/file1 >>> # dst is a directory >>> dst = 's3://openmmlab/mmengine' >>> # src will be copied to 's3://openmmlab/mmengine/file'' >>> copyfile_from_local(src, dst) 's3://openmmlab/mmengine/file' """ backend = get_file_backend( dst, backend_args=backend_args, enable_singleton=True) return backend.copyfile_from_local(src, dst)
[docs]def copytree_from_local( src: Union[str, Path], dst: Union[str, Path], backend_args: Optional[dict] = None, ) -> Union[str, Path]: """Recursively copy an entire directory tree rooted at src to a directory named dst and return the destination directory. Note: If the backend is the instance of LocalBackend, it does the same thing with :func:`copytree`. Args: src (str or Path): A local directory to be copied. dst (str or Path): Copy directory to dst. backend_args (dict, optional): Arguments to instantiate the corresponding backend. Defaults to None. Returns: str: The destination directory. Examples: >>> src = '/path/of/dir' >>> dst = 's3://openmmlab/mmengine/dir' >>> copyfile_from_local(src, dst) 's3://openmmlab/mmengine/dir' """ backend = get_file_backend( dst, backend_args=backend_args, enable_singleton=True) return backend.copytree_from_local(src, dst)
[docs]def copyfile_to_local( src: Union[str, Path], dst: Union[str, Path], backend_args: Optional[dict] = None, ) -> Union[str, Path]: """Copy the file src to local dst and return the destination file. If dst specifies a directory, the file will be copied into dst using the base filename from src. If dst specifies a file that already exists, it will be replaced. Note: If the backend is the instance of LocalBackend, it does the same thing with :func:`copyfile`. Args: src (str or Path): A file to be copied. dst (str or Path): Copy file to to local dst. backend_args (dict, optional): Arguments to instantiate the corresponding backend. Defaults to None. Returns: str: If dst specifies a directory, the file will be copied into dst using the base filename from src. Examples: >>> # dst is a file >>> src = 's3://openmmlab/mmengine/file' >>> dst = '/path/of/file' >>> # src will be copied to '/path/of/file' >>> copyfile_to_local(src, dst) '/path/of/file' >>> # dst is a directory >>> dst = '/path/of/dir' >>> # src will be copied to '/path/of/dir/file' >>> copyfile_to_local(src, dst) '/path/of/dir/file' """ backend = get_file_backend( dst, backend_args=backend_args, enable_singleton=True) return backend.copyfile_to_local(src, dst)
[docs]def copytree_to_local( src: Union[str, Path], dst: Union[str, Path], backend_args: Optional[dict] = None, ) -> Union[str, Path]: """Recursively copy an entire directory tree rooted at src to a local directory named dst and return the destination directory. Note: If the backend is the instance of LocalBackend, it does the same thing with :func:`copytree`. Args: src (str or Path): A directory to be copied. dst (str or Path): Copy directory to local dst. backend_args (dict, optional): Arguments to instantiate the corresponding backend. Defaults to None. Returns: str: The destination directory. Examples: >>> src = 's3://openmmlab/mmengine/dir' >>> dst = '/path/of/dir' >>> copytree_to_local(src, dst) '/path/of/dir' """ backend = get_file_backend( dst, backend_args=backend_args, enable_singleton=True) return backend.copytree_to_local(src, dst)
[docs]def remove( filepath: Union[str, Path], backend_args: Optional[dict] = None, ) -> None: """Remove a file. Args: filepath (str, Path): Path to be removed. backend_args (dict, optional): Arguments to instantiate the corresponding backend. Defaults to None. Raises: FileNotFoundError: If filepath does not exist, an FileNotFoundError will be raised. IsADirectoryError: If filepath is a directory, an IsADirectoryError will be raised. Examples: >>> filepath = '/path/of/file' >>> remove(filepath) """ backend = get_file_backend( filepath, backend_args=backend_args, enable_singleton=True) backend.remove(filepath)
[docs]def rmtree( dir_path: Union[str, Path], backend_args: Optional[dict] = None, ) -> None: """Recursively delete a directory tree. Args: dir_path (str or Path): A directory to be removed. backend_args (dict, optional): Arguments to instantiate the corresponding backend. Defaults to None. Examples: >>> dir_path = '/path/of/dir' >>> rmtree(dir_path) """ backend = get_file_backend( dir_path, backend_args=backend_args, enable_singleton=True) backend.rmtree(dir_path)
[docs]def list_dir_or_file( dir_path: Union[str, Path], list_dir: bool = True, list_file: bool = True, suffix: Optional[Union[str, Tuple[str]]] = None, recursive: bool = False, backend_args: Optional[dict] = None, ) -> Iterator[str]: """Scan a directory to find the interested directories or files in arbitrary order. Note: :meth:`list_dir_or_file` returns the path relative to ``dir_path``. Args: dir_path (str or Path): Path of the directory. list_dir (bool): List the directories. Defaults to True. list_file (bool): List the path of files. Defaults to True. suffix (str or tuple[str], optional): File suffix that we are interested in. Defaults to None. recursive (bool): If set to True, recursively scan the directory. Defaults to False. backend_args (dict, optional): Arguments to instantiate the corresponding backend. Defaults to None. Yields: Iterable[str]: A relative path to ``dir_path``. Examples: >>> dir_path = '/path/of/dir' >>> for file_path in list_dir_or_file(dir_path): ... print(file_path) >>> # list those files and directories in current directory >>> for file_path in list_dir_or_file(dir_path): ... print(file_path) >>> # only list files >>> for file_path in list_dir_or_file(dir_path, list_dir=False): ... print(file_path) >>> # only list directories >>> for file_path in list_dir_or_file(dir_path, list_file=False): ... print(file_path) >>> # only list files ending with specified suffixes >>> for file_path in list_dir_or_file(dir_path, suffix='.txt'): ... print(file_path) >>> # list all files and directory recursively >>> for file_path in list_dir_or_file(dir_path, recursive=True): ... print(file_path) """ backend = get_file_backend( dir_path, backend_args=backend_args, enable_singleton=True) yield from backend.list_dir_or_file(dir_path, list_dir, list_file, suffix, recursive)
[docs]def generate_presigned_url( url: str, client_method: str = 'get_object', expires_in: int = 3600, backend_args: Optional[dict] = None, ) -> str: """Generate the presigned url of video stream which can be passed to mmcv.VideoReader. Now only work on Petrel backend. Note: Now only work on Petrel backend. Args: url (str): Url of video stream. client_method (str): Method of client, 'get_object' or 'put_object'. Defaults to 'get_object'. expires_in (int): expires, in seconds. Defaults to 3600. backend_args (dict, optional): Arguments to instantiate the corresponding backend. Defaults to None. Returns: str: Generated presigned url. """ backend = get_file_backend( url, backend_args=backend_args, enable_singleton=True) return backend.generate_presigned_url(url, client_method, expires_in)
[docs]def load(file, file_format=None, file_client_args=None, backend_args=None, **kwargs): """Load data from json/yaml/pickle files. This method provides a unified api for loading data from serialized files. ``load`` supports loading data from serialized files those can be storaged in different backends. Args: file (str or :obj:`Path` or file-like object): Filename or a file-like object. file_format (str, optional): If not specified, the file format will be inferred from the file extension, otherwise use the specified one. Currently supported formats include "json", "yaml/yml" and "pickle/pkl". file_client_args (dict, optional): Arguments to instantiate a FileClient. See :class:`mmengine.fileio.FileClient` for details. Defaults to None. It will be deprecated in future. Please use ``backend_args`` instead. backend_args (dict, optional): Arguments to instantiate the prefix of uri corresponding backend. Defaults to None. New in v0.2.0. Examples: >>> load('/path/of/your/file') # file is storaged in disk >>> load('https://path/of/your/file') # file is storaged in Internet >>> load('s3://path/of/your/file') # file is storaged in petrel Returns: The content from the file. """ if isinstance(file, Path): file = str(file) if file_format is None and is_str(file): file_format = file.split('.')[-1] if file_format not in file_handlers: raise TypeError(f'Unsupported format: {file_format}') if file_client_args is not None: warnings.warn( '"file_client_args" will be deprecated in future. ' 'Please use "backend_args" instead', DeprecationWarning) if backend_args is not None: raise ValueError( '"file_client_args and "backend_args" cannot be set at the ' 'same time.') handler = file_handlers[file_format] if is_str(file): if file_client_args is not None: file_client = FileClient.infer_client(file_client_args, file) file_backend = file_client else: file_backend = get_file_backend(file, backend_args=backend_args) if handler.str_like: with StringIO(file_backend.get_text(file)) as f: obj = handler.load_from_fileobj(f, **kwargs) else: with BytesIO(file_backend.get(file)) as f: obj = handler.load_from_fileobj(f, **kwargs) elif hasattr(file, 'read'): obj = handler.load_from_fileobj(file, **kwargs) else: raise TypeError('"file" must be a filepath str or a file-object') return obj
[docs]def dump(obj, file=None, file_format=None, file_client_args=None, backend_args=None, **kwargs): """Dump data to json/yaml/pickle strings or files. This method provides a unified api for dumping data as strings or to files, and also supports custom arguments for each file format. ``dump`` supports dumping data as strings or to files which is saved to different backends. Args: obj (any): The python object to be dumped. file (str or :obj:`Path` or file-like object, optional): If not specified, then the object is dumped to a str, otherwise to a file specified by the filename or file-like object. file_format (str, optional): Same as :func:`load`. file_client_args (dict, optional): Arguments to instantiate a FileClient. See :class:`mmengine.fileio.FileClient` for details. Defaults to None. It will be deprecated in future. Please use ``backend_args`` instead. backend_args (dict, optional): Arguments to instantiate the prefix of uri corresponding backend. Defaults to None. New in v0.2.0. Examples: >>> dump('hello world', '/path/of/your/file') # disk >>> dump('hello world', 's3://path/of/your/file') # ceph or petrel Returns: bool: True for success, False otherwise. """ if isinstance(file, Path): file = str(file) if file_format is None: if is_str(file): file_format = file.split('.')[-1] elif file is None: raise ValueError( 'file_format must be specified since file is None') if file_format not in file_handlers: raise TypeError(f'Unsupported format: {file_format}') if file_client_args is not None: warnings.warn( '"file_client_args" will be deprecated in future. ' 'Please use "backend_args" instead', DeprecationWarning) if backend_args is not None: raise ValueError( '"file_client_args" and "backend_args" cannot be set at the ' 'same time.') handler = file_handlers[file_format] if file is None: return handler.dump_to_str(obj, **kwargs) elif is_str(file): if file_client_args is not None: file_client = FileClient.infer_client(file_client_args, file) file_backend = file_client else: file_backend = get_file_backend(file, backend_args=backend_args) if handler.str_like: with StringIO() as f: handler.dump_to_fileobj(obj, f, **kwargs) file_backend.put_text(f.getvalue(), file) else: with BytesIO() as f: handler.dump_to_fileobj(obj, f, **kwargs) file_backend.put(f.getvalue(), file) elif hasattr(file, 'write'): handler.dump_to_fileobj(obj, file, **kwargs) else: raise TypeError('"file" must be a filename str or a file-object')

© Copyright 2022, mmengine contributors. Revision 66fb81f7.

Built with Sphinx using a theme provided by Read the Docs.
Read the Docs v: latest
Versions
latest
stable
v0.10.3
v0.10.2
v0.10.1
v0.10.0
v0.9.1
v0.9.0
v0.8.5
v0.8.4
v0.8.3
v0.8.2
v0.8.1
v0.8.0
v0.7.4
v0.7.3
v0.7.2
v0.7.1
v0.7.0
v0.6.0
v0.5.0
v0.4.0
v0.3.0
v0.2.0
Downloads
epub
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.