Source code for torch.distributed

from __future__ import absolute_import, division, print_function, unicode_literals

import torch
import sys


def is_available():
    return (hasattr(torch._C, "_c10d_init") and hasattr(torch._C, "_rpc_init")
            and hasattr(torch._C, "_dist_autograd_init"))


if is_available() and not (torch._C._c10d_init() and torch._C._rpc_init() and torch._C._dist_autograd_init()):
    raise RuntimeError("Failed to initialize PyTorch distributed support")


if is_available():
    from .distributed_c10d import *  # noqa: F401
    # Variables prefixed with underscore are not auto imported
    # See the comment in `distributed_c10d.py` above `_backend` on why we expose
    # this.
    from .distributed_c10d import _backend  # noqa: F401
    if sys.version_info >= (3, 0):
        from .rpc import _init_rpc
        from .rpc import *  # noqa: F401

        def init_model_parallel(self_name,
                                backend=RpcBackend.PROCESS_GROUP,
                                self_rank=-1,
                                init_method=None,
                                num_send_recv_threads=4):
            r"""
            Initializes model parallel primitives such as the local rpc agent
            and distributed autograd.

            Initializes the local RPC agent which immediately makes the current
            process ready to send and receive RPCs. The caller needs to make
            sure the specified backend is properly intialized before calling
            this method. For example, to use ``pg`` (ProcessGroup) backend,
            ``init_process_group`` must be invoked prior to this method.

            Arguments:
                backend (Enum): type of RPC backend implementation.
                            Currently, process group backend is the only
                            available backend implementation. (default:
                            ``RpcBackend.PROCESS_GROUP``).
                self_name (str): a globally unique name of this node. (e.g.,
                            ``Trainer3``, ``ParameterServer2``, ``Master``,
                            ``Worker1``) Name can only contain number, alphabet,
                            underscore, and/or dash, and must be shorter than
                            128 characters.
                self_rank (int): a globally unique id/rank of this node.
                init_method(str): backend specific init arguments.
                num_send_recv_threads(int): Number of threads for send/recv work.
            """
            _init_rpc(backend, self_name, self_rank, init_method, num_send_recv_threads)
            from .rpc import _agent
            autograd._init(_agent.get_worker_id().id)
Source code for torch.distributed

Docs

Tutorials

Resources