diff --git a/README.md b/README.md
index 6a9c5356d..e64269596 100644
--- a/README.md
+++ b/README.md
@@ -12,15 +12,15 @@ No need to clone the huge PyTorch repo. No need to install Sphinx. No need to wa
| Docs | Version | Release Page |
| ------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
-| PyTorch | [![torch version](https://img.shields.io/badge/torch_version-v1.7.0-282828.svg?labelColor=4F4F4F&logo=PyTorch)](https://pytorch.org/blog/pytorch-1.7-released/) | [Link](https://github.com/unknownue/PyTorch.docs/releases/tag/v1.7.0) |
-| torchvision | [![torchvision version](https://img.shields.io/badge/torchvision_version-v0.8.0-282828.svg?labelColor=4F4F4FF&logo=PyTorch)](https://github.com/pytorch/vision/releases/tag/v0.8.0) | [Link](https://github.com/unknownue/PyTorch.docs/releases/tag/v1.7.0) |
+| PyTorch | [![torch version](https://img.shields.io/badge/torch_version-v1.8.0-282828.svg?labelColor=4F4F4F&logo=PyTorch)](https://pytorch.org/blog/pytorch-1.8-released/) | [Link](https://github.com/unknownue/PyTorch.docs/releases/tag/v1.8.0) |
+| torchvision | [![torchvision version](https://img.shields.io/badge/torchvision_version-v0.9.0-282828.svg?labelColor=4F4F4FF&logo=PyTorch)](https://github.com/pytorch/vision/releases/tag/v0.9.0) | [Link](https://github.com/unknownue/PyTorch.docs/releases/tag/v1.8.0) |
| Numpy | [![numpy version](https://badgen.net/badge/NumPy%20version/v1.19.0/black?icon=dockbit)](https://numpy.org/doc/1.19/release.html) | [Link](https://numpy.org/doc) |
| Scikit-learn | [![scikit-learn version](https://badgen.net/badge/Scikit-learn%20version/v0.22/black?icon=libraries)](https://github.com/scikit-learn/scikit-learn/releases/tag/0.23.2) | [Link](https://github.com/unknownue/PyTorch.docs/releases/tag/v1.7.1) |
| Matplotlib | [![matplotlib version](https://badgen.net/badge/Matplotlib%20version/v3.3.1/black?icon=graphql)](https://github.com/matplotlib/matplotlib/releases/tag/v3.3.1) | [Link](https://github.com/unknownue/PyTorch.docs/releases/tag/v1.6.1) |
## How to use
-You can download from [release page](https://github.com/unknownue/PyTorch.docs/releases)(recommended), or clone this repo(about 300+MB) by
+You can download from [release page](https://github.com/unknownue/PyTorch.docs/releases)(recommended), or clone this repo(about 700+MB) by
```shell
$ git clone https://github.com/unknownue/PyTorch.docs.git
@@ -30,4 +30,4 @@ The documentation of PyTorch is in `torch` directory, and that of torchvision is
Open `Index.html` to view the documentation.
-If you want to build by yourself, the `build` directory contains the build configuration in docker.
\ No newline at end of file
+If you want to build by yourself, the `build` directory contains the build configuration in docker.
diff --git a/torch/__config__.html b/torch/__config__.html
index 399377b40..35c504a73 100644
--- a/torch/__config__.html
+++ b/torch/__config__.html
@@ -31,7 +31,7 @@
-
+
@@ -60,6 +60,7 @@
+
importplatformimporttextwrapimportctypes
+importwarningsifsys.version_info<(3,):raiseException("Python 2 has reached end-of-life and is no longer supported by PyTorch.")
@@ -363,7 +401,11 @@
Source code for torch
from._utilsimport_import_dotted_namefrom._utils_internalimportget_file_path,prepare_multiprocessing_environment, \
USE_RTLD_GLOBAL_WITH_LIBTORCH,USE_GLOBAL_DEPS
-from.versionimport__version__
+# TODO(torch_deploy) figure out how to freeze version.py in fbcode build
+ifsys.executable=='torch_deploy':
+ __version__="torch-deploy-1.8"
+else:
+ from.versionimport__version__from._siximportstring_classesas_string_classesfromtypingimportSet,Type,TYPE_CHECKING
@@ -377,7 +419,8 @@
[docs]defis_tensor(obj):r"""Returns True if `obj` is a PyTorch tensor. Note that this function is simply doing ``isinstance(obj, Tensor)``.
@@ -601,16 +644,16 @@
Source code for torch
Args: obj (Object): Object to test """
- returnisinstance(obj,torch.Tensor)
+ returnisinstance(obj,torch.Tensor)
-defis_storage(obj):
+
[docs]defis_storage(obj):r"""Returns True if `obj` is a PyTorch storage object. Args: obj (Object): Object to test """
- returntype(obj)in_storage_classes
+ returntype(obj)in_storage_classes
"""_C._set_default_dtype(d)
-defset_deterministic(d):
+defuse_deterministic_algorithms(d):r""" Sets whether PyTorch operations must use "deterministic" algorithms. That is, algorithms which, given the same input, and when run on the same software and hardware, always produce the same output.
@@ -689,6 +732,10 @@
Source code for torch
* :class:`torch.nn.ConvTranspose2d` when called on CUDA tensor * :class:`torch.nn.ConvTranspose3d` when called on CUDA tensor * :func:`torch.bmm` when called on sparse-dense CUDA tensors
+ * :func:`torch.__getitem__` backward when `self` is a CPU tensor and
+ ``indices`` is a list of tensors
+ * :func:`torch.index_put` with ``accumulate=True`` when called on a CPU
+ tensor The following normally-nondeterministic operations will throw a :class:`RuntimeError` when `d=True`:
@@ -701,11 +748,13 @@
Source code for torch
* :class:`torch.nn.FractionalMaxPool2d` when called on a CUDA tensor that requires grad * :class:`torch.nn.FractionalMaxPool3d` when called on a CUDA tensor that requires grad * :func:`torch.nn.functional.interpolate` when called on a CUDA tensor that requires grad
- and one of the following modes is used:
- - `linear`
- - `bilinear`
- - `bicubic`
- - `trilinear`
+ and one of the following modes is used:
+
+ - `linear`
+ - `bilinear`
+ - `bicubic`
+ - `trilinear`
+
* :class:`torch.nn.ReflectionPad1d` when called on a CUDA tensor that requires grad * :class:`torch.nn.ReflectionPad2d` when called on a CUDA tensor that requires grad * :class:`torch.nn.ReplicationPad1d` when called on a CUDA tensor that requires grad
@@ -716,10 +765,13 @@
Source code for torch
* :class:`torch.nn.EmbeddingBag` when called on a CUDA tensor that requires grad * :func:`torch.scatter_add_` when called on a CUDA tensor * :func:`torch.index_add_` when called on a CUDA tensor
+ * :func:`torch.index_copy` * :func:`torch.index_select` when called on a CUDA tensor that requires grad * :func:`torch.repeat_interleave` when called on a CUDA tensor that requires grad * :func:`torch.histc` when called on a CUDA tensor * :func:`torch.bincount` when called on a CUDA tensor
+ * :func:`torch.kthvalue` with called on a CUDA tensor
+ * :func:`torch.median` with indices output when called on a CUDA tensor A handful of CUDA operations are nondeterministic if the CUDA version is 10.2 or greater, unless the environment variable `CUBLAS_WORKSPACE_CONFIG=:4096:8`
@@ -739,13 +791,33 @@
Source code for torch
d (:class:`bool`): If True, force operations to be deterministic. If False, allow non-deterministic operations. """
- _C._set_deterministic(d)
+ _C._set_deterministic_algorithms(d)
-defis_deterministic():
+defset_deterministic(d):
+ r"""This function is deprecated and will be removed in a future release.
+ Please use :func:`torch.use_deterministic_algorithms` instead.
+ """
+ warnings.warn((
+ "torch.set_deterministic is deprecated and will be removed in a future "
+ "release. Please use torch.use_deterministic_algorithms instead"))
+
+ use_deterministic_algorithms(d)
+
+defare_deterministic_algorithms_enabled():r"""Returns True if the global deterministic flag is turned on. Refer to
- :func:`torch.set_deterministic` documentation for more details.
+ :func:`torch.use_deterministic_algorithms` documentation for more details.
+ """
+ return_C._get_deterministic_algorithms()
+
+defis_deterministic():
+ r"""This function is deprecated and will be removed in a future release.
+ Please use :func:`torch.are_deterministic_algorithms_enabled` instead. """
- return_C._get_deterministic()
+ warnings.warn((
+ "torch.is_deterministic is deprecated and will be removed in a future "
+ "release. Please use torch.are_deterministic_algorithms_enabled instead"))
+ returnare_deterministic_algorithms_enabled()
+
################################################################################# Define Storage and Tensor classes
@@ -809,11 +881,13 @@
Source code for torch
classQInt32Storage(_C.QInt32StorageBase,_StorageBase):pass
+classQUInt4x2Storage(_C.QUInt4x2StorageBase,_StorageBase):
+ pass_storage_classes={DoubleStorage,FloatStorage,LongStorage,IntStorage,ShortStorage,CharStorage,ByteStorage,HalfStorage,BoolStorage,QUInt8Storage,QInt8Storage,
- QInt32Storage,BFloat16Storage,ComplexFloatStorage,ComplexDoubleStorage
+ QInt32Storage,BFloat16Storage,ComplexFloatStorage,ComplexDoubleStorage,QUInt4x2Storage}# The _tensor_classes set is initialized by the call to _C._initialize_tensor_type_bindings()
@@ -829,7 +903,7 @@
# These were previously defined in native_functions.yaml and appeared on the# `torch` namespace, but we moved them to c10 dispatch to facilitate custom
-# class usage. We add these lines here to preserve backward compatbility.
+# class usage. We add these lines here to preserve backward compatibility.quantized_lstm=torch.ops.aten.quantized_lstmquantized_gru=torch.ops.aten.quantized_gru
-
-
-defAssert(condition,message):
- r"""A wrapper around Python's assert which is symbolically traceable.
- """
- from.overridesimporthas_torch_function,handle_torch_function
-
- iftype(condition)isnottorch.Tensorandhas_torch_function((condition,)):
- returnhandle_torch_function(Assert,(condition,),condition,message)
- assertcondition,message
# TODO: In principle, we could provide more structured version/config
-# information here. We're not for now; considering doing so if someone
-# asks for it.
+# information here. For now only CXX_FLAGS is exposed, as Timer
+# uses them.
+def_cxx_flags():
+ """Returns the CXX_FLAGS used when building PyTorch."""
+ returntorch._C._cxx_flags()
[docs]defparallel_info():r"""Returns detailed string with parallelization settings"""
@@ -580,10 +620,6 @@
importcollectionsimportenumimportinspect
+importastimportweakrefimportwarnings
+fromtextwrapimportdedentimporttorchimportsys# This is needed. `torch._jit_internal` is imported before `torch.distributed.__init__`.
@@ -422,7 +461,7 @@
Source code for torch._jit_internal
value,len_parsed=parseNestedExpr(expr,module)assertlen_parsed==len(expr),"whole expression was not parsed, falling back to c++ parser"returnvalue
- exceptExceptionase:
+ exceptException:""" The python resolver fails in several cases in known unit tests, and is intended to fall back gracefully to the c++ resolver in general. For example, python 2 style
@@ -536,6 +575,9 @@
Source code for torch._jit_internal
# functions will be defined at a global scope like MyGlobalClass. In cases# where they are not, it is possible to work around issues by declaring the# values global in the function.
+# In Python 3.9 declaring class as global will make it invisible to
+# `inspect.getsource`, see https://bugs.python.org/issue42666 .
+# This could be worked around by manualy adding it to `global()` dictionary.
@@ -570,6 +612,98 @@
Source code for torch._jit_internal
returnall(has_code)
+defget_annotation_str(annotation):
+ """
+ Convert an AST node containing a type annotation to the string present in the source
+ that represents the same annotation.
+ """
+ ifisinstance(annotation,ast.Name):
+ returnannotation.id
+ elifisinstance(annotation,ast.Attribute):
+ return'.'.join([get_annotation_str(annotation.value),annotation.attr])
+ elifisinstance(annotation,ast.Subscript):
+ # In Python3.9+ subscript indicies are not wrapped in ast.Index
+ subscript_slice=annotation.sliceifsys.version_info>=(3,9)elseannotation.slice.value# type: ignore
+ returnf"{get_annotation_str(annotation.value)}[{get_annotation_str(subscript_slice)}]"
+ elifisinstance(annotation,ast.Tuple):
+ return','.join([get_annotation_str(elt)foreltinannotation.elts])
+ elifisinstance(annotation,ast.Constant)orisinstance(annotation,ast.NameConstant):
+ returnf"{annotation.value}"
+
+ # If an AST node is not handled here, it's probably handled in ScriptTypeParser.
+ returnNone
+
+
+defget_type_hint_captures(fn):
+ """
+ Get a dictionary containing type resolution mappings necessary to resolve types
+ for the literal annotations on 'fn'. These are not considered to be closed-over by fn
+ and must be obtained separately (e.g. using this function).
+
+ Args:
+ fn: A callable.
+ Returns:
+ A Dict[str, Any] containing a mapping from the literal annotations used on
+ fn to the Python objects they refer to.
+ """
+ # Gather a dictionary of parameter name -> type, skipping any parameters whose annotated
+ # types are strings. These are only understood by TorchScript in the context of a type annotation
+ # that refers to a class in its own definition, but trying to include a mapping for this in the result
+ # function would cause infinite recursion because the class is currently being compiled.
+ # In addition, there is logic in ScriptTypeParser to handle this.
+ signature=inspect.signature(fn)
+ name_to_type={
+ name:parameter.annotation
+ forname,parameterinsignature.parameters.items()
+ ifparameter.annotationisnotinspect.Parameter.emptyandnotisinstance(parameter.annotation,str)
+ }
+
+ # Then, get the literal type annotations from the function declaration
+ # by source inspection. This accounts for the case in which aliases are used
+ # to annotate the arguments (e.g device_t = torch.device, and then d: device_t).
+ src=inspect.getsource(fn)
+
+ # frontend.py cannot be used here because it includes _jit_internal, so use ast instead.
+ a=ast.parse(dedent(src))
+ iflen(a.body)!=1ornotisinstance(a.body[0],ast.FunctionDef):
+ raiseRuntimeError(f"Expected {fn} to be a function")
+ f=a.body[0]
+
+ # Prepare a dictionary of source annotation -> type, which will be the final result of this function,
+ # by using the parsed AST (f) to reconstruct source annotations as strings for each parameter and mapping
+ # them to the type object corresponding to the annotation via name_to_type using the parameter name.
+ annotation_to_type={}
+
+ forarginf.args.args:
+ # Get the source type annotation string for this argument if possible.
+ arg_annotation_str=get_annotation_str(arg.annotation)ifarg.annotationelseNone
+
+ # If the argument has no annotation or get_annotation_str cannot convert it to a string,
+ # arg_annotation_str will be None. Skip this arg; ScriptTypeParser will probably handle
+ # this in the latter case.
+ ifarg_annotation_strisNone:
+ continue
+
+ # Insert {arg_annotation_str: type} into annotation_to_type if possible. One reason arg_name may not
+ # be present in name_to_type is that the annotation itself is a string and not a type object
+ # (common for self-refential annotations in classes). Once again, let ScriptTypeParser handle this.
+ arg_name=arg.arg
+ ifarg_nameinname_to_type:
+ annotation_to_type[arg_annotation_str]=name_to_type[arg_name]
+
+ # If there is a valid return annotation, include it in annotation_to_type. As with argument annotations,
+ # the literal annotation has to be convertible to a string by get_annotation_str, and the actual type
+ # of the annotation cannot be a string.
+ literal_return_annotation=get_annotation_str(f.returns)
+ valid_literal_annotation=literal_return_annotationisnotNone
+ return_annotation=signature.return_annotation
+ valid_return_annotation_type=return_annotationisnotinspect.Parameter.emptyandnotisinstance(return_annotation,str)
+ ifvalid_literal_annotationandvalid_return_annotation_type:
+ annotation_to_type[literal_return_annotation]=return_annotation
+
+ returnannotation_to_type
+
+
defcreateResolutionCallbackForClassMethods(cls):""" This looks at all the methods defined in a class and pulls their closed-over
@@ -582,6 +716,7 @@
[docs]defunused(fn):
+defunused(fn):""" This decorator indicates to the compiler that a function or method should be ignored and replaced with the raising of an exception. This allows you
@@ -710,9 +845,9 @@
defis_tuple(ann):ifannisTuple:
- raiseRuntimeError(
- "Attempted to use Tuple without a "
- "contained type. Please add a contained type, e.g. "
- "Tuple[int]"
- )
+ raise_error_container_parameter_missing("Tuple")# For some reason Python 3.7 violates the Type[A, B].__origin__ == Type ruleifnothasattr(ann,'__module__'):
@@ -996,11 +1128,7 @@
Source code for torch._jit_internal
defis_list(ann):ifannisList:
- raiseRuntimeError(
- "Attempted to use List without a "
- "contained type. Please add a contained type, e.g. "
- "List[int]"
- )
+ raise_error_container_parameter_missing("List")ifnothasattr(ann,'__module__'):returnFalse
@@ -1010,11 +1138,7 @@
Source code for torch._jit_internal
defis_dict(ann):ifannisDict:
- raiseRuntimeError(
- "Attempted to use Dict without "
- "contained types. Please add contained type, e.g. "
- "Dict[int, int]"
- )
+ raise_error_container_parameter_missing("Dict")ifnothasattr(ann,'__module__'):returnFalse
@@ -1024,11 +1148,7 @@
Source code for torch._jit_internal
defis_optional(ann):ifannisOptional:
- raiseRuntimeError(
- "Attempted to use Optional without a "
- "contained type. Please add a contained type, e.g. "
- "Optional[int]"
- )
+ raise_error_container_parameter_missing("Optional")# Optional[T] is just shorthand for Union[T, None], so check for bothdefsafe_is_subclass(the_type,super_type):
@@ -1077,7 +1197,7 @@
Source code for torch._jit_internal
defis_final(ann):returnann.__module__in{'typing','typing_extensions'}and \
- (getattr(ann,'__origin__',None)isFinal)
+ (getattr(ann,'__origin__',None)isFinalorisinstance(ann,type(Final)))# allows BroadcastingList instance to be subscriptableclassBroadcastingListCls(object):
@@ -1091,7 +1211,7 @@
[docs]defis_scripting():r""" Function that returns True when in compilation and False otherwise. This is useful especially with the @unused decorator to leave code in your
@@ -1110,7 +1230,7 @@
""" A generic method for computing poly(x) using the Horner's rule.
- Arguments:
+ Args: poly (Tensor): the (possibly batched) 1D Tensor representing polynomial coefficients such that poly[..., i] = (a_{i_0}, ..., a{i_n} (==1)), and
@@ -606,7 +643,7 @@
we do the following symmetrization map: `A -> (A + A.t()) / 2`. The map is performed only when the `A` requires gradients.
- Arguments:
+ Args: A (Tensor): the input tensor of size :math:`(*, m, m)`
@@ -950,7 +987,7 @@
matrix product `D M` with element-wise product `M * d`. Also, creating the diagonal matrix `D` is avoided.
- Arguments:
+ Args: S (Tensor): the matrix basis for the search subspace, size is :math:`(m, n)`.
@@ -1301,7 +1338,7 @@
Source code for torch._lobpcg
modification of the corresponding algorithm introduced in [StathopolousWu2002].
- Arguments:
+ Args: U (Tensor) : initial approximation, size is (m, n) drop (bool) : when True, drop columns that
@@ -1367,7 +1404,7 @@
Source code for torch._lobpcg
.. note:: If all U columns are B-collinear to V then the returned tensor U will be empty.
- Arguments:
+ Args: U (Tensor) : initial approximation, size is (m, n) V (Tensor) : B-orthogonal external basis, size is (m, k)
@@ -1681,10 +1718,6 @@
__all__=['svd_lowrank','pca_lowrank']
-fromtypingimportTuple,Optional
-
-importtorchfromtorchimportTensor
+importtorchfrom.import_linalg_utilsas_utils
-from._overridesimporthas_torch_function,handle_torch_function
+from.overridesimporthas_torch_function,handle_torch_function
+fromtypingimportOptional,Tuple
-defget_approximate_basis(A,# type: Tensor
- q,# type: int
- niter=2,# type: Optional[int]
- M=None# type: Optional[Tensor]
- ):
- # type: (...) -> Tensor
+defget_approximate_basis(A:Tensor,
+ q:int,
+ niter:Optional[int]=2,
+ M:Optional[Tensor]=None
+ )->Tensor:"""Return tensor :math:`Q` with :math:`q` orthonormal columns such that :math:`Q Q^H A` approximates :math:`A`. If :math:`M` is specified, then :math:`Q` is such that :math:`Q Q^H (A - M)`
@@ -381,7 +416,7 @@
Source code for torch._lowrank
.. note:: To obtain repeatable results, reset the seed for the pseudorandom number generator
- Arguments::
+ Args:: A (Tensor): the input tensor of size :math:`(*, m, n)` q (int): the dimension of subspace spanned by :math:`Q`
@@ -426,8 +461,8 @@
Source code for torch._lowrank
returnQ
-
[docs]defsvd_lowrank(A,q=6,niter=2,M=None):
- # type: (Tensor, Optional[int], Optional[int], Optional[Tensor]) -> Tuple[Tensor, Tensor, Tensor]
+defsvd_lowrank(A:Tensor,q:Optional[int]=6,niter:Optional[int]=2,
+ M:Optional[Tensor]=None)->Tuple[Tensor,Tensor,Tensor]:r"""Return the singular value decomposition ``(U, S, V)`` of a matrix, batches of matrices, or a sparse matrix :math:`A` such that :math:`A \approx U diag(S) V^T`. In case :math:`M` is given, then
@@ -447,7 +482,7 @@
Source code for torch._lowrank
will be useful for huge sparse matrices that ``torch.svd`` cannot handle.
- Arguments::
+ Args:: A (Tensor): the input tensor of size :math:`(*, m, n)` q (int, optional): a slightly overestimated rank of A.
@@ -464,18 +499,18 @@
Source code for torch._lowrank
structure with randomness: probabilistic algorithms for constructing approximate matrix decompositions, arXiv:0909.4061 [math.NA; math.PR], 2009 (available at
- `arXiv <http://arxiv.org/abs/0909.4061>`_).
+ `arXiv <https://arxiv.org/abs/0909.4061>`_). """ifnottorch.jit.is_scripting():tensor_ops=(A,M)if(notset(map(type,tensor_ops)).issubset((torch.Tensor,type(None)))andhas_torch_function(tensor_ops)):returnhandle_torch_function(svd_lowrank,tensor_ops,A,q=q,niter=niter,M=M)
- return_svd_lowrank(A,q=q,niter=niter,M=M)
# Algorithm 5.1 in Halko et al 2009, slightly modified to reduce# the number conjugate and transpose operations
- ifm<n:
- # computing the SVD approximation of a transpose in order to
- # keep B shape minimal
+ ifm<norn>q:
+ # computing the SVD approximation of a transpose in
+ # order to keep B shape minimal (the m < n case) or the V
+ # shape small (the n > q case)Q=get_approximate_basis(A_t,q,niter=niter,M=M_t)Q_c=_utils.conjugate(Q)ifMisNone:B_t=matmul(A,Q_c)else:B_t=matmul(A,Q_c)-matmul(M,Q_c)
+ assertB_t.shape[-2]==m,(B_t.shape,m)
+ assertB_t.shape[-1]==q,(B_t.shape,q)
+ assertB_t.shape[-1]<=B_t.shape[-2],B_t.shapeU,S,V=torch.svd(B_t)V=Q.matmul(V)else:
@@ -505,14 +544,18 @@
[docs]defpca_lowrank(A:Tensor,q:Optional[int]=None,center:bool=True,
+ niter:int=2)->Tuple[Tensor,Tensor,Tensor]:r"""Performs linear Principal Component Analysis (PCA) on a low-rank matrix, batches of such matrices, or sparse matrix.
@@ -547,7 +590,7 @@
Source code for torch._lowrank
.. note:: To obtain repeatable results, reset the seed for the pseudorandom number generator
- Arguments:
+ Args: A (Tensor): the input tensor of size :math:`(*, m, n)`
@@ -611,11 +654,8 @@
else:returntorch.stack([get_summarized_data(x)forxinself])
-def_str_intern(self):
+def_str_intern(inp):prefix='tensor('indent=len(prefix)suffixes=[]
+ # This is used to extract the primal value and thus disable the forward AD
+ # within this function.
+ # TODO(albanD) This needs to be updated when more than one level is supported
+ self,tangent=torch.autograd.forward_ad.unpack_dual(inp)
+
# Note [Print tensor device]:# A general logic here is we only print device when it doesn't match# the device specified in default tensor type.
@@ -661,7 +704,8 @@
ifself.layout!=torch.strided:suffixes.append('layout='+str(self.layout))
- ifself.grad_fnisnotNone:
- name=type(self.grad_fn).__name__
+ # Use inp here to get the original grad_fn and not the one generated by the forward grad
+ # unpacking.
+ ifinp.grad_fnisnotNone:
+ name=type(inp.grad_fn).__name__ifname=='CppFunction':
- name=self.grad_fn.name().rsplit('::',1)[-1]
+ name=inp.grad_fn.name().rsplit('::',1)[-1]suffixes.append('grad_fn=<{}>'.format(name))
- elifself.requires_grad:
+ elifinp.requires_grad:suffixes.append('requires_grad=True')ifself.has_names():suffixes.append('names={}'.format(self.names))
+ iftangentisnotNone:
+ suffixes.append('tangent={}'.format(tangent))
+
return_add_suffixes(prefix+tensor_str,suffixes,indent,force_newline=self.is_sparse)def_str(self):
@@ -934,10 +983,6 @@
importtorch
+importtorch._six
+fromtypingimportOptional,List,DefaultDictimportwarningsfromcollectionsimportdefaultdictimportsysimporttraceback
+
def_type(self,dtype=None,non_blocking=False,**kwargs):"""Returns the type if `dtype` is not provided, else casts this object to the specified type.
@@ -378,9 +418,9 @@
returntensor
+_sparse_tensors_to_validate:List["torch.Tensor"]=[]
+
+# In _legacy_load() in serialization.py we unpickle storages after the sparse
+# tensors have been already unpickled. Those storages contain data necessary for
+# validating sparse tensors: indices and values. That's why sparse tensors are
+# first unpickled without any validation, and then this function is called just
+# before _legacy_load() returns, so that all the sparse tensors can be validated
+# in bulk.
+#
+# The same procedure must be followed by _load() in serialization.py because due
+# to Pickler semantics, we have to use the same (non-validating) function for
+# unpickling sparse tensors, regardless of the caller.
+def_validate_loaded_sparse_tensors():
+ try:
+ fortin_sparse_tensors_to_validate:
+ torch._validate_sparse_coo_tensor_args(t._indices(),t._values(),
+ t.size())
+ finally:
+ _sparse_tensors_to_validate.clear()
+
def_rebuild_sparse_tensor(layout,data):iflayout==torch.sparse_coo:indices,values,size=data
- returntorch.sparse_coo_tensor(indices,values,size)
+ result=torch._sparse_coo_tensor_unsafe(indices,values,size)
+ _sparse_tensors_to_validate.append(result)
+ returnresult
+
raiseNotImplementedError("rebuilding sparse tensor for layout %s"%(layout))
@@ -503,11 +566,15 @@
buffer. Element-wise operation on this buffer will be equivalent to operating individually.
- Arguments:
+ Args: tensors (Iterable[Tensor]): dense tensors to flatten. Returns:
@@ -578,15 +645,15 @@
Source code for torch._utils
"""Flatten sparse tensors into two contiguous 1D buffers, one of indices and one of values. Assume tensors are of same sparse type.
- Arguments:
+ Args: tensors (Iterable[Tensor]): sparse tensors to flatten. Returns: A tuple of two contiguous 1D buffers, one containing input tensors' indices and the other containing the values. """
- flat_indices=_flatten_dense_tensors([torch._indices(t)fortintensors])
- flat_values=_flatten_dense_tensors([torch._values(t)fortintensors])
+ flat_indices=_flatten_dense_tensors([torch.Tensor._indices(t)fortintensors])
+ flat_values=_flatten_dense_tensors([torch.Tensor._values(t)fortintensors])returnflat_indices,flat_values
@@ -594,7 +661,7 @@
Source code for torch._utils
"""View a flat buffer using the sizes of tensors. Assume that tensors are of same dense type, and that flat is given by _flatten_dense_tensors.
- Arguments:
+ Args: flat (Tensor): flattened dense tensors to unflatten. tensors (Iterable[Tensor]): dense tensors whose sizes will be used to unflatten flat.
@@ -617,7 +684,7 @@
Source code for torch._utils
tensors. Assume that tensors are of same sparse type, and that flat is given by _flatten_sparse_tensors.
- Arguments:
+ Args: flat (tuple(Tensor, Tensor)): flattened indices and values of sparse tensors to unflatten. tensors (Iterable[Tensor]): sparse tensors whose sizes will be used to
@@ -628,8 +695,8 @@
types, e.g., from _take_tensors. Reorder them to be of same order as ordered_tensors.
- Arguments:
+ Args: tensors (Iterable[Tensor]): tensors to be reordered. They should be of the same order as ordered_tensors within their own types. ordered_tensors (Iterable[Tensor]): tensors whose order will be the
@@ -654,8 +721,8 @@
Blocks of tensors of same type and within size_limit. The yielded tensors are only ordered as the original sequence within its types. """
- buf_dict=defaultdict(lambda:[[],0])
+ buf_dict:DefaultDict[str,List]=defaultdict(lambda:[[],0])fortensorintensors:t=tensor.type()iftensor.is_sparse:
- indices=torch._indices(tensor)
- values=torch._values(tensor)
+ indices=torch.Tensor._indices(tensor)
+ values=torch.Tensor._values(tensor)size=indices.numel()*indices.element_size()+values.numel()*values.element_size()else:size=tensor.numel()*tensor.element_size()
@@ -736,7 +803,85 @@
Source code for torch._utils
# makes stack traces unreadable. It will not be changed in Python# (https://bugs.python.org/issue2651), so we work around it.msg=KeyErrorMessage(msg)
+ elifgetattr(self.exc_type,"message",None):
+ # Some exceptions have first argument as non-str but explicitly
+ # have message field
+ raiseself.exc_type(message=msg)raiseself.exc_type(msg)
+
+
+def_get_available_device_type():
+ iftorch.cuda.is_available():
+ return"cuda"
+ # add more available device types here
+ returnNone
+
+
+def_get_device_attr(get_member):
+ device_type=_get_available_device_type()
+ ifdevice_typeanddevice_type.lower()=="cuda":
+ returnget_member(torch.cuda)
+ # add more available device types here
+ returnNone
+
+
+def_get_current_device_index():
+ # current device index
+ return_get_device_attr(lambdam:m.current_device())
+
+
+def_get_all_device_indices():
+ # all device index
+ return_get_device_attr(lambdam:list(range(m.device_count())))
+
+
+def_get_devices_properties(device_ids):
+ # all device properties
+ return[_get_device_attr(lambdam:m.get_device_properties(i))foriindevice_ids]
+
+
+def_get_device_index(device,optional=False,allow_cpu=False)->int:
+ r"""Gets the device index from :attr:`device`, which can be a torch.device
+ object, a Python integer, or ``None``.
+
+ If :attr:`device` is a torch.device object, returns the device index if it
+ has index. Note that for a device without a specified index,
+ i.e., ``torch.device('xxx')``, this will return the current default
+ device of that type if :attr:`optional` is ``True``. If :attr:`allow_cpu` is ``True``,
+ CPU devices will be accepted and ``-1`` will be returned in this case.
+
+ If :attr:`device` is a Python integer, it is returned as is.
+
+ If :attr:`device` is ``None``, this will return the current default
+ device of the supported runtime platform if :attr:`optional` is ``True``.
+ i.e., the current default CUDA device will be returned if CUDA runtime is supported.
+ """
+ ifisinstance(device,str):
+ device=torch.device(device)
+ device_idx:Optional[int]
+ device_idx=None
+ ifisinstance(device,torch.device):
+ ifnotallow_cpuanddevice.type=='cpu':
+ raiseValueError('Expected a non cpu device, but got: {}'.format(device))
+ device_idx=-1ifdevice.type=='cpu'elsedevice.index
+ ifisinstance(device,int):
+ device_idx=device
+ ifdevice_idxisNone:
+ ifoptional:
+ device_idx=_get_current_device_index()
+ else:
+ raiseValueError('Expected a torch.device with a specified index '
+ 'or an integer, but got:{}'.format(device))
+ returndevice_idx
+
+
+def_handle_complex(tensor):
+ """
+ Returns a real view of a tensor if complex dtype else just the tensor
+ need to check if a UninitializedParameter because otherwise checking is_complex is an error for a LazyModule
+ """
+ returntorch.view_as_real(tensor)ifnotisinstance(tensor,
+ torch.nn.UninitializedParameter)andtensor.is_complex()elsetensor
+str(grad.shape)+" and output["+str(outputs.index(out))+"] has a shape of "+str(out.shape)+".")
- if(out.dtype.is_complex!=grad.dtype.is_complex):
+ ifout.dtype.is_complex!=grad.dtype.is_complex:raiseRuntimeError("For complex Tensors, both grad_output and output"" are required to have the same dtype."" Mismatch in dtype: grad_output["
@@ -398,12 +438,21 @@
[docs]defbackward(tensors:_TensorOrTensors,grad_tensors:Optional[_TensorOrTensors]=None,retain_graph:Optional[bool]=None,create_graph:bool=False,grad_variables:Optional[_TensorOrTensors]=None,
+ inputs:Optional[Sequence[torch.Tensor]]=None,)->None:r"""Computes the sum of gradients of given tensors w.r.t. graph leaves.
@@ -428,7 +477,13 @@
Source code for torch.autograd
If you have to use this function, make sure to reset the ``.grad`` fields of your parameters to ``None`` after use to break the cycle and avoid the leak.
- Arguments:
+ .. note::
+
+ If you run any forward ops, create ``grad_tensors``, and/or call ``backward``
+ in a user-specified CUDA stream context, see
+ :ref:`Stream semantics of backward passes<bwd-cuda-stream-semantics>`.
+
+ Args: tensors (sequence of Tensor): Tensors of which the derivative will be computed. grad_tensors (sequence of (Tensor or None)): The "vector" in the Jacobian-vector
@@ -443,6 +498,11 @@
Source code for torch.autograd
create_graph (bool, optional): If ``True``, graph of the derivative will be constructed, allowing to compute higher order derivative products. Defaults to ``False``.
+ inputs (sequence of Tensor): Inputs w.r.t. which the gradient will be
+ accumulated into ``.grad``. All other Tensors will be ignored. If not
+ provided, the gradient is accumulated into all the leaf Tensors that were
+ used to compute the attr::tensors. All the provided inputs must be leaf
+ Tensors. """ifgrad_variablesisnotNone:warnings.warn("'grad_variables' is deprecated. Use 'grad_tensors' instead.")
@@ -452,23 +512,20 @@
Source code for torch.autograd
raiseRuntimeError("'grad_tensors' and 'grad_variables' (deprecated) ""arguments both passed to backward(). Please only ""use 'grad_tensors'.")
+ ifinputsisnotNoneandlen(inputs)==0:
+ raiseRuntimeError("'inputs' argument to backward() cannot be empty.")tensors=(tensors,)ifisinstance(tensors,torch.Tensor)elsetuple(tensors)
+ inputs=tuple(inputs)ifinputsisnotNoneelsetuple()
- ifgrad_tensorsisNone:
- grad_tensors=[None]*len(tensors)
- elifisinstance(grad_tensors,torch.Tensor):
- grad_tensors=[grad_tensors]
- else:
- grad_tensors=list(grad_tensors)
-
- grad_tensors=_make_grads(tensors,grad_tensors)
+ grad_tensors_=_tensor_or_tensors_to_tuple(grad_tensors,len(tensors))
+ grad_tensors_=_make_grads(tensors,grad_tensors_)ifretain_graphisNone:retain_graph=create_graphVariable._execution_engine.run_backward(
- tensors,grad_tensors,retain_graph,create_graph,
- allow_unreachable=True)# allow_unreachable flag
+ tensors,grad_tensors_,retain_graph,create_graph,inputs,
+ allow_unreachable=True,accumulate_grad=True)# allow_unreachable flag
leaves will still be computed, and will be accumulated into their ``.grad`` attribute.
- Arguments:
+ .. note::
+
+ If you run any forward ops, create ``grad_outputs``, and/or call ``grad``
+ in a user-specified CUDA stream context, see
+ :ref:`Stream semantics of backward passes<bwd-cuda-stream-semantics>`.
+
+ Args: outputs (sequence of Tensor): outputs of the differentiated function. inputs (sequence of Tensor): Inputs w.r.t. which the gradient will be returned (and not accumulated into ``.grad``).
@@ -511,29 +574,36 @@
Source code for torch.autograd
used when computing outputs (and therefore their grad is always zero) is an error. Defaults to ``False``. """
+ outputs=(outputs,)ifisinstance(outputs,torch.Tensor)elsetuple(outputs)
+ inputs=(inputs,)ifisinstance(inputs,torch.Tensor)elsetuple(inputs)
+ overridable_args=outputs+inputs
+ ifhas_torch_function(overridable_args):
+ returnhandle_torch_function(
+ grad,
+ overridable_args,
+ outputs,
+ inputs,
+ grad_outputs=grad_outputs,
+ retain_graph=retain_graph,
+ create_graph=create_graph,
+ only_inputs=only_inputs,
+ allow_unused=allow_unused,
+ )
+
ifnotonly_inputs:warnings.warn("only_inputs argument is deprecated and is ignored now ""(defaults to True). To accumulate gradient for other ""parts of the graph, please use torch.autograd.backward.")
- outputs=(outputs,)ifisinstance(outputs,torch.Tensor)elsetuple(outputs)
- inputs=(inputs,)ifisinstance(inputs,torch.Tensor)elsetuple(inputs)
-
- ifgrad_outputsisNone:
- grad_outputs=[None]*len(outputs)
- elifisinstance(grad_outputs,torch.Tensor):
- grad_outputs=[grad_outputs]
- else:
- grad_outputs=list(grad_outputs)
-
- grad_outputs=_make_grads(outputs,grad_outputs)
+ grad_outputs_=_tensor_or_tensors_to_tuple(grad_outputs,len(outputs))
+ grad_outputs_=_make_grads(outputs,grad_outputs_)ifretain_graphisNone:retain_graph=create_graphreturnVariable._execution_engine.run_backward(
- outputs,grad_outputs,retain_graph,create_graph,
- inputs,allow_unused)
r"""Context-manager that enable anomaly detection for the autograd engine. This does two things:
+
- Running the forward pass with detection enabled will allow the backward
- pass to print the traceback of the forward operation that created the failing
- backward function.
+ pass to print the traceback of the forward operation that created the failing
+ backward function. - Any backward computation that generate "nan" value will raise an error. .. warning::
@@ -435,7 +473,7 @@
Source code for torch.autograd.anomaly_mode
See ``detect_anomaly`` above for details of the anomaly detection behaviour.
- Arguments:
+ Args: mode (bool): Flag whether to enable anomaly detection (``True``), or disable (``False``).
@@ -671,10 +709,6 @@
This is used e.g. for indices returned from a max :class:`Function`. """
- self.non_differentiable=args
+ self.non_differentiable=args
+
[docs]defset_materialize_grads(self,value):
+ r"""Sets whether to materialize output grad tensors. Default is true.
+
+ **This should be called only from inside the** :func:`forward` **method**
+
+ If true, undefined output grad tensors will be expanded to tensors full
+ of zeros prior to calling the :func:`backward` method.
+ """
+ self.materialize_grads=value
class_HookMixin(object):
@@ -421,7 +468,8 @@
Source code for torch.autograd.function
_is_legacy=Falsedefapply(self,*args):
- returnself._forward_cls.backward(self,*args)
+ # _forward_cls is defined by derived class
+ returnself._forward_cls.backward(self,*args)# type: ignoreclassFunctionMeta(type):
@@ -452,8 +500,8 @@
[docs]classFunction(with_metaclass(FunctionMeta,_C._FunctionBase,_ContextMethodMixin,_HookMixin)):
+# mypy doesn't understand `with_metaclass` from torch._six
+
[docs]classFunction(with_metaclass(FunctionMeta,_C._FunctionBase,_ContextMethodMixin,_HookMixin)):# type: ignorer"""Records operation history and defines formulas for differentiating ops. See the Note on extending the autograd engine for more details on how to use
@@ -564,7 +612,7 @@
Source code for torch.autograd.function
ifnotisinstance(outputs,tuple):outputs=(outputs,)
- err_fn=torch._C._functions.DelayedError(
+ err_fn=_functions.DelayedError(b"trying to differentiate twice a function that was marked"b"with @once_differentiable",len(outputs))
@@ -667,7 +715,7 @@
Source code for torch.autograd.function
# unflatten a list or tuple input into a nested list/tuple structure# specified by protodefunflatten_helper(input,proto):
- res=[]
+ res:List[Optional[torch.Tensor]]=[]ifhasattr(proto,"_jit_wrap"):returnproto._jit_wrap(input)ifnotisinstance(proto,(list,tuple)):
@@ -716,16 +764,16 @@
Defaults to ``False``. Returns:
- vjp (tuple of Tensors or Tensor): result of the dot product with
- the same shape as the inputs.
+ output (tuple): tuple with:
+ func_output (tuple of Tensors or Tensor): output of ``func(inputs)``
+
+ vjp (tuple of Tensors or Tensor): result of the dot product with
+ the same shape as the inputs. Example:
@@ -643,8 +685,11 @@
Source code for torch.autograd.functional
Defaults to ``False``. Returns:
- jvp (tuple of Tensors or Tensor): result of the dot product with
- the same shape as the output.
+ output (tuple): tuple with:
+ func_output (tuple of Tensors or Tensor): output of ``func(inputs)``
+
+ jvp (tuple of Tensors or Tensor): result of the dot product with
+ the same shape as the output. Example:
@@ -709,7 +754,37 @@
[docs]defjacobian(func,inputs,create_graph=False,strict=False):
+def_construct_standard_basis_for(tensors:Tuple[torch.Tensor,...],tensor_numels:Tuple[int,...])->Tuple[torch.Tensor,...]:
+ # This function:
+ # - constructs a N=sum(tensor_numels) standard basis. i.e. an NxN identity matrix.
+ # - Splits the identity matrix into chunks with each chunk size determined by `tensor_numels`.
+ # - Each chunk corresponds to one tensor. The chunk has the same dtype and
+ # device as the tensor
+ #
+ # For example, with tensor_numels = [1, 2, 1], this function returns:
+ # ( tensor([[1], tensor([[0, 0], tensor([[0],
+ # [0], [1, 0], [0],
+ # [0], [0, 1], [0],
+ # [0]]) , [0, 0]]) , [1]]) )
+ #
+ # Precondition: tensor_numels == tuple(tensor.numel() for tensor in tensors)
+ # Precondition: tensors always has at least one element.
+ #
+ # See NOTE: [Computing jacobian with vmap and grad for multiple tensors]
+ # for context behind this function. All the pre-conditions are guarded for
+ # in torch.autograd.functional.jacobian.
+ assertlen(tensors)==len(tensor_numels)
+ assertlen(tensors)>0
+ total_numel=sum(tensor_numels)
+ diag_start_indices=(0,*torch.tensor(tensor_numels).cumsum(dim=0)[:-1].neg().unbind())
+ chunks=tuple(tensor.new_zeros(total_numel,tensor_numel)
+ fortensor,tensor_numelinzip(tensors,tensor_numels))
+ forchunk,diag_start_idxinzip(chunks,diag_start_indices):
+ chunk.diagonal(diag_start_idx).fill_(1)
+ returnchunks
+
+
+
[docs]defjacobian(func,inputs,create_graph=False,strict=False,vectorize=False):r"""Function that computes the Jacobian of a given function. Args:
@@ -725,17 +800,29 @@
Source code for torch.autograd.functional
independent of it. If ``False``, we return a Tensor of zeros as the jacobian for said inputs, which is the expected mathematical value. Defaults to ``False``.
+ vectorize (bool, optional): This feature is experimental, please use at
+ your own risk. When computing the jacobian, usually we invoke
+ ``autograd.grad`` once per row of the jacobian. If this flag is
+ ``True``, we use the vmap prototype feature as the backend to
+ vectorize calls to ``autograd.grad`` so we only invoke it once
+ instead of once per row. This should lead to performance
+ improvements in many use cases, however, due to this feature
+ being incomplete, there may be performance cliffs. Please
+ use `torch._C._debug_only_display_vmap_fallback_warnings(True)`
+ to show any performance warnings and file us issues if
+ warnings exist for your use case. Defaults to ``False``. Returns:
- Jacobian (Tensor or nested tuple of Tensors): if there are a single
- input and output, this will be a single Tensor containing the
- Jacobian for the linearized inputs and output. If one of the two is
- a tuple, then the Jacobian will be a tuple of Tensors. If both of
- them are tuples, then the Jacobian will be a tuple of tuple of
- Tensors where ``Jacobian[i][j]`` will contain the Jacobian of the
- ``i``\th output and ``j``\th input and will have as size the
- concatenation of the sizes of the corresponding output and the
- corresponding input.
+ Jacobian (Tensor or nested tuple of Tensors): if there is a single
+ input and output, this will be a single Tensor containing the
+ Jacobian for the linearized inputs and output. If one of the two is
+ a tuple, then the Jacobian will be a tuple of Tensors. If both of
+ them are tuples, then the Jacobian will be a tuple of tuple of
+ Tensors where ``Jacobian[i][j]`` will contain the Jacobian of the
+ ``i``\th output and ``j``\th input and will have as size the
+ concatenation of the sizes of the corresponding output and the
+ corresponding input and will have same dtype and device as the
+ corresponding input. Example:
@@ -745,14 +832,12 @@
"jacobian")_check_requires_grad(outputs,"outputs",strict=strict)
- jacobian=tuple()
+
+ ifvectorize:
+ ifstrict:
+ raiseRuntimeError('torch.autograd.functional.jacobian: `strict=True` '
+ 'and `vectorized=True` are not supported together. '
+ 'Please either set `strict=False` or '
+ '`vectorize=False`.')
+ # NOTE: [Computing jacobian with vmap and grad for multiple outputs]
+ #
+ # Let's consider f(x) = (x**2, x.sum()) and let x = torch.randn(3).
+ # It turns out we can compute the jacobian of this function with a single
+ # call to autograd.grad by using vmap over the correct grad_outputs.
+ #
+ # Firstly, one way to compute the jacobian is to stack x**2 and x.sum()
+ # into a 4D vector. E.g., use g(x) = torch.stack([x**2, x.sum()])
+ #
+ # To get the first row of the jacobian, we call
+ # >>> autograd.grad(g(x), x, grad_outputs=torch.tensor([1, 0, 0, 0]))
+ # To get the 2nd row of the jacobian, we call
+ # >>> autograd.grad(g(x), x, grad_outputs=torch.tensor([0, 1, 0, 0]))
+ # and so on.
+ #
+ # Using vmap, we can vectorize all 4 of these computations into one by
+ # passing the standard basis for R^4 as the grad_output.
+ # vmap(partial(autograd.grad, g(x), x))(torch.eye(4)).
+ #
+ # Now, how do we compute the jacobian *without stacking the output*?
+ # We can just split the standard basis across the outputs. So to
+ # compute the jacobian of f(x), we'd use
+ # >>> autograd.grad(f(x), x, grad_outputs=_construct_standard_basis_for(...))
+ # The grad_outputs looks like the following:
+ # ( torch.tensor([[1, 0, 0],
+ # [0, 1, 0],
+ # [0, 0, 1],
+ # [0, 0, 0]]),
+ # torch.tensor([[0],
+ # [0],
+ # [0],
+ # [1]]) )
+ #
+ # But we're not done yet!
+ # >>> vmap(partial(autograd.grad(f(x), x, grad_outputs=...)))
+ # returns a Tensor of shape [4, 3]. We have to remember to split the
+ # jacobian of shape [4, 3] into two:
+ # - one of shape [3, 3] for the first output
+ # - one of shape [ 3] for the second output
+
+ # Step 1: Construct grad_outputs by splitting the standard basis
+ output_numels=tuple(output.numel()foroutputinoutputs)
+ grad_outputs=_construct_standard_basis_for(outputs,output_numels)
+ flat_outputs=tuple(output.reshape(-1)foroutputinoutputs)
+
+ # Step 2: Call vmap + autograd.grad
+ defvjp(grad_output):
+ vj=list(_autograd_grad(flat_outputs,inputs,grad_output,create_graph=create_graph))
+ forel_idx,vj_elinenumerate(vj):
+ ifvj_elisnotNone:
+ continue
+ vj[el_idx]=torch.zeros_like(inputs[el_idx])
+ returntuple(vj)
+
+ jacobians_of_flat_output=_vmap(vjp)(grad_outputs)
+
+ # Step 3: The returned jacobian is one big tensor per input. In this step,
+ # we split each Tensor by output.
+ jacobian_input_output=[]
+ forjac,input_iinzip(jacobians_of_flat_output,inputs):
+ jacobian_input_i_output=[]
+ forjac,output_jinzip(jac.split(output_numels,dim=0),outputs):
+ jacobian_input_i_output_j=jac.view(output_j.shape+input_i.shape)
+ jacobian_input_i_output.append(jacobian_input_i_output_j)
+ jacobian_input_output.append(jacobian_input_i_output)
+
+ # Step 4: Right now, `jacobian` is a List[List[Tensor]].
+ # The outer List corresponds to the number of inputs,
+ # the inner List corresponds to the number of outputs.
+ # We need to exchange the order of these and convert to tuples
+ # before returning.
+ jacobian_output_input=tuple(zip(*jacobian_input_output))
+
+ jacobian_output_input=_grad_postprocess(jacobian_output_input,create_graph)
+ return_tuple_postprocess(jacobian_output_input,(is_outputs_tuple,is_inputs_tuple))
+
+ jacobian:Tuple[torch.Tensor,...]=tuple()fori,outinenumerate(outputs):
- jac_i=tuple([]for_inrange(len(inputs)))
+ # mypy complains that expression and variable have different types due to the empty list
+ jac_i:Tuple[List[torch.Tensor]]=tuple([]for_inrange(len(inputs)))# type: ignoreforjinrange(out.nelement()):vj=_autograd_grad((out.reshape(-1)[j],),inputs,retain_graph=True,create_graph=create_graph)
@@ -807,7 +976,7 @@
[docs]defhessian(func,inputs,create_graph=False,strict=False,vectorize=False):r"""Function that computes the Hessian of a given scalar function. Args:
@@ -822,14 +991,26 @@
Source code for torch.autograd.functional
such that all the outputs are independent of it. If ``False``, we return a Tensor of zeros as the hessian for said inputs, which is the expected mathematical value. Defaults to ``False``.
+ vectorize (bool, optional): This feature is experimental, please use at
+ your own risk. When computing the hessian, usually we invoke
+ ``autograd.grad`` once per row of the hessian. If this flag is
+ ``True``, we use the vmap prototype feature as the backend to
+ vectorize calls to ``autograd.grad`` so we only invoke it once
+ instead of once per row. This should lead to performance
+ improvements in many use cases, however, due to this feature
+ being incomplete, there may be performance cliffs. Please
+ use `torch._C._debug_only_display_vmap_fallback_warnings(True)`
+ to show any performance warnings and file us issues if
+ warnings exist for your use case. Defaults to ``False``. Returns:
- Hessian (Tensor or a tuple of tuple of Tensors) if there are a single input,
- this will be a single Tensor containing the Hessian for the input.
- If it is a tuple, then the Hessian will be a tuple of tuples where
- ``Hessian[i][j]`` will contain the Hessian of the ``i``\th input
- and ``j``\th input with size the sum of the size of the ``i``\th input plus
- the size of the ``j``\th input.
+ Hessian (Tensor or a tuple of tuple of Tensors): if there is a single input,
+ this will be a single Tensor containing the Hessian for the input.
+ If it is a tuple, then the Hessian will be a tuple of tuples where
+ ``Hessian[i][j]`` will contain the Hessian of the ``i``\th input
+ and ``j``\th input with size the sum of the size of the ``i``\th input plus
+ the size of the ``j``\th input. ``Hessian[i][j]`` will have the same
+ dtype and device as the corresponding ``i``\th input. Example:
@@ -839,28 +1020,20 @@
func (function): a Python function that takes Tensor inputs and returns a Tensor with a single element. inputs (tuple of Tensors or Tensor): inputs to the function ``func``.
- v (tuple of Tensors or Tensor): The vector for which the vector Hessian product is computed. Must be the
- same size as the input of ``func``. This argument is optional when
- ``func``'s input contains a single element and (if it is not provided) will be set as a Tensor
- containing a single ``1``.
- create_graph (bool, optional): If ``True``, both the output and result will be
- computed in a differentiable way. Note that when ``strict`` is ``False``, the result can not
- require gradients or be disconnected from the inputs.
+ v (tuple of Tensors or Tensor): The vector for which the vector Hessian
+ product is computed. Must be the same size as the input of
+ ``func``. This argument is optional when ``func``'s input contains
+ a single element and (if it is not provided) will be set as a
+ Tensor containing a single ``1``.
+ create_graph (bool, optional): If ``True``, both the output and result
+ will be computed in a differentiable way. Note that when ``strict``
+ is ``False``, the result can not require gradients or be
+ disconnected from the inputs. Defaults to ``False``.
- strict (bool, optional): If ``True``, an error will be raised when we detect that there exists an input
- such that all the outputs are independent of it. If ``False``, we return a Tensor of zeros as the
+ strict (bool, optional): If ``True``, an error will be raised when we
+ detect that there exists an input such that all the outputs are
+ independent of it. If ``False``, we return a Tensor of zeros as the vhp for said inputs, which is the expected mathematical value. Defaults to ``False``. Returns:
- func_output (tuple of Tensors or Tensor): output of ``func(inputs)``
- vhp (tuple of Tensors or Tensor): result of the dot product with the same shape
- as the inputs.
- Example::
+ output (tuple): tuple with:
+ func_output (tuple of Tensors or Tensor): output of ``func(inputs)``
+
+ vhp (tuple of Tensors or Tensor): result of the dot product with the
+ same shape as the inputs.
+
+ Example:
+
>>> def pow_reducer(x): ... return x.pow(3).sum() >>> inputs = torch.rand(2, 2) >>> v = torch.ones(2, 2) >>> vhp(pow_reducer, inputs, v)
- (tensor(0.5591),
- tensor([[1.0689, 1.2431],
- [3.0989, 4.4456]]))
+ (tensor(0.5591),
+ tensor([[1.0689, 1.2431],
+ [3.0989, 4.4456]])) >>> vhp(pow_reducer, inputs, v, create_graph=True) (tensor(0.5591, grad_fn=<SumBackward0>), tensor([[1.0689, 1.2431],
@@ -1009,7 +1189,9 @@
Source code for torch.autograd.functional
hvp for said inputs, which is the expected mathematical value. Defaults to ``False``. Returns:
- func_output (tuple of Tensors or Tensor): output of ``func(inputs)``
+ output (tuple): tuple with:
+ func_output (tuple of Tensors or Tensor): output of ``func(inputs)``
+
hvp (tuple of Tensors or Tensor): result of the dot product with the same shape as the inputs.
@@ -1307,10 +1489,6 @@
-importtorch
+importsys
+importtorchimportfunctoolsimportinspect
+fromtypingimportAny,Callable,TypeVar,cast
+
+
+__all__=['no_grad','enable_grad','set_grad_enabled']
+
+
+# Used for annotating the decorator usage of 'no_grad' and 'enable_grad'.
+# See https://mypy.readthedocs.io/en/latest/generics.html#declaring-decorators
+FuncType=Callable[...,Any]
+F=TypeVar('F',bound=FuncType)
+
class_DecoratorContextManager:"""Allow a context manager to be used as a decorator"""
- def__call__(self,func):
+ def__call__(self,func:F)->F:ifinspect.isgeneratorfunction(func):returnself._wrap_generator(func)@functools.wraps(func)defdecorate_context(*args,**kwargs):
- withself:
+ withself.__class__():returnfunc(*args,**kwargs)
- returndecorate_context
+ returncast(F,decorate_context)def_wrap_generator(self,func):"""Wrap each generator invocation with the context manager"""@functools.wraps(func)defgenerator_context(*args,**kwargs):gen=func(*args,**kwargs)
- whileTrue:
- try:
- withself:
- x=next(gen)
- yieldx
- exceptStopIteration:
- break
+
+ # Generators are suspended and unsuspended at `yield`, hence we
+ # make sure the grad mode is properly set every time the execution
+ # flow returns into the wrapped generator and restored when it
+ # returns through our `yield` to our caller (see PR #49017).
+ cls=type(self)
+ try:
+ # Issuing `None` to a generator fires it up
+ withcls():
+ response=gen.send(None)
+
+ whileTrue:
+ try:
+ # Forward the response to our caller and get its next request
+ request=yieldresponse
+
+ exceptGeneratorExit:
+ # Inform the still active generator about its imminent closure
+ withcls():
+ gen.close()
+ raise
+
+ exceptBaseException:
+ # Propagate the exception thrown at us by the caller
+ withcls():
+ response=gen.throw(*sys.exc_info())
+
+ else:
+ # Pass the last request to the generator and get its response
+ withcls():
+ response=gen.send(request)
+
+ # We let the exceptions raised above by the generator's `.throw` or
+ # `.send` methods bubble up to our caller, except for StopIteration
+ exceptStopIterationase:
+ # The generator informed us that it is done: take whatever its
+ # returned value (if any) was and indicate that we're done too
+ # by returning it (see docs for python's return-statement).
+ returne.value
+
returngenerator_context
+ def__enter__(self)->None:
+ raiseNotImplementedError
+
+ def__exit__(self,exc_type:Any,exc_value:Any,traceback:Any)->None:
+ raiseNotImplementedError
+
-
[docs]classno_grad(_DecoratorContextManager):
+classno_grad(_DecoratorContextManager):r"""Context-manager that disabled gradient calculation. Disabling gradient calculation is useful for inference, when you are sure
@@ -386,8 +474,6 @@
Source code for torch.autograd.grad_mode
In this mode, the result of every computation will have `requires_grad=False`, even when the inputs have `requires_grad=True`.
- This mode has no effect when using :class:`~enable_grad` context manager .
-
This context manager is thread local; it will not affect computation in other threads.
@@ -408,15 +494,20 @@
[docs]classenable_grad(_DecoratorContextManager):r"""Context-manager that enables gradient calculation. Enables gradient calculation, if it has been disabled via :class:`~no_grad`
@@ -447,27 +538,24 @@
[docs]classset_grad_enabled(object):
+classset_grad_enabled(object):r"""Context-manager that sets gradient calculation to on or off. ``set_grad_enabled`` will enable or disable grads based on its argument :attr:`mode`. It can be used as a context-manager or as a function.
- When using :class:`~enable_grad` context manager, :class:`~set_grad_enabled(False)`
- has no effect.
-
This context manager is thread local; it will not affect computation in other threads.
- Arguments:
+ Args: mode (bool): Flag whether to enable grad (``True``), or disable (``False``). This can be used to conditionally enable gradients.
@@ -492,15 +580,15 @@
defmake_jacobian(input,num_out):
- ifisinstance(input,torch.Tensor):
+ ifis_tensor_like(input):ifnotinput.is_floating_point()andnotinput.is_complex():returnNoneifnotinput.requires_grad:returnNone
- returntorch.zeros(input.nelement(),num_out,dtype=input.dtype)
+ returninput.new_zeros((input.nelement(),num_out),dtype=input.dtype,layout=torch.strided)elifisinstance(input,container_abcs.Iterable)andnotisinstance(input,str):jacobians=list(filter(lambdax:xisnotNone,(make_jacobian(elem,num_out)forelemininput)))ifnotjacobians:returnNone
- returntype(input)(jacobians)
+ returntype(input)(jacobians)# type: ignoreelse:returnNone
-defiter_tensors(x,only_requiring_grad=False):
- ifisinstance(x,torch.Tensor):
- ifx.requires_gradornotonly_requiring_grad:
- yieldx
+defiter_tensors(x:Union[torch.Tensor,Iterable[torch.Tensor]],only_requiring_grad:bool=False)->Iterable[torch.Tensor]:
+ ifis_tensor_like(x):
+ # mypy doesn't narrow type of `x` to torch.Tensor
+ ifx.requires_gradornotonly_requiring_grad:# type: ignore
+ yieldx# type: ignoreelifisinstance(x,container_abcs.Iterable)andnotisinstance(x,str):foreleminx:forresultiniter_tensors(elem,only_requiring_grad):yieldresult
-defget_numerical_jacobian(fn,input,target=None,eps=1e-3):
+defget_numerical_jacobian(fn,input,target=None,eps=1e-3,grad_out=1.0):""" input: input to `fn` target: the Tensors wrt whom Jacobians are calculated (default=`input`)
+ grad_out: grad output value used to calculate gradients. Note that `target` may not even be part of `input` to `fn`, so please be **very careful** in this to not clone `target`.
@@ -407,11 +449,55 @@
Source code for torch.autograd.gradcheck
x_tensors=iter_tensors(target,True)j_tensors=iter_tensors(jacobian)
+ defupdate_jacobians(x,idx,d,d_idx,is_mkldnn=False):
+
+ # compute_jacobian only works for pure real
+ # or pure imaginary delta
+ defcompute_gradient(delta):
+ # we currently assume that the norm of delta equals eps
+ assert(delta==epsordelta==(eps*1j))
+
+ deffn_out():
+ ifnotis_mkldnn:
+ # x is a view into input and so this works
+ returnfn(input).clone()
+ else:
+ # convert the dense tensor back to have mkldnn layout
+ returnfn([x.to_mkldnn()])
+
+ orig=x[idx].item()
+ x[idx]=orig-delta
+ outa=fn_out()
+ x[idx]=orig+delta
+ outb=fn_out()
+ x[idx]=orig
+ r=(outb-outa)/(2*eps)
+ returnr.detach().reshape(-1)
+
+ # for details on the algorithm used here, refer:
+ # Section 3.5.3 https://arxiv.org/pdf/1701.00392.pdf
+ # s = fn(z) where z = x for real valued input
+ # and z = x + yj for complex valued input
+ ds_dx=compute_gradient(eps)
+ ifx.is_complex():# C -> C, C -> R
+ ds_dy=compute_gradient(eps*1j)
+ # conjugate wirtinger derivative
+ conj_w_d=0.5*(ds_dx+ds_dy*1j)
+ # wirtinger derivative
+ w_d=0.5*(ds_dx-ds_dy*1j)
+ d[d_idx]=grad_out.conjugate()*conj_w_d+grad_out*w_d.conj()
+ elifds_dx.is_complex():# R -> C
+ # w_d = conj_w_d = 0.5 * ds_dx
+ # dL_dz_conj = 0.5 * [grad_out.conj() * ds_dx + grad_out * ds_dx.conj()]
+ # = 0.5 * [grad_out.conj() * ds_dx + (grad_out.conj() * ds_dx).conj()]
+ # = 0.5 * 2 * real(grad_out.conj() * ds_dx)
+ # = real(grad_out.conj() * ds_dx)
+ d[d_idx]=torch.real(grad_out.conjugate()*ds_dx)
+ else:# R -> R
+ d[d_idx]=ds_dx*grad_out
+
# TODO: compare structureforx_tensor,d_tensorinzip(x_tensors,j_tensors):
- is_complex=x_tensor.dtype.is_complex
- ifis_complex:
- eps*=(1+1j)ifx_tensor.is_sparse:defget_stride(size):dim=len(size)
@@ -436,15 +522,8 @@
Source code for torch.autograd.gradcheck
forx_idxinproduct(*[range(m)forminx_values.size()[1:]]):indices=x_indices[i].tolist()+list(x_idx)d_idx=sum(indices[k]*x_stride[k]forkinrange(len(x_size)))
- orig=x_value[x_idx].item()
- x_value[x_idx]=orig-eps
- outa=fn(input).clone()
- x_value[x_idx]=orig+eps
- outb=fn(input).clone()
- x_value[x_idx]=orig
- r=(outb-outa)/(2*eps)
- d_tensor[d_idx]=r.detach().reshape(-1)
- elifx_tensor.layout==torch._mkldnn:
+ update_jacobians(x_value,x_idx,d_tensor,d_idx)
+ elifx_tensor.layout==torch._mkldnn:# type: ignore# Use .data here to get around the version checkx_tensor=x_tensor.dataiflen(input)!=1:
@@ -454,41 +533,23 @@
Source code for torch.autograd.gradcheck
# this is really inefficient, but without indexing implemented, there's# not really a better way than converting back and forthx_tensor_dense=x_tensor.to_dense()
- orig=x_tensor_dense[x_idx].item()
-
- x_tensor_dense[x_idx]=orig-eps
- x_tensor_mkl=x_tensor_dense.to_mkldnn()
- outa=fn([x_tensor_mkl])
-
- x_tensor_dense[x_idx]=orig+eps
- x_tensor_mkl=x_tensor_dense.to_mkldnn()
- outb=fn([x_tensor_mkl])
-
- r=(outb-outa)/(2*eps)
- d_tensor[d_idx]=r.detach().reshape(-1)
+ update_jacobians(x_tensor_dense,x_idx,d_tensor,d_idx,is_mkldnn=True)else:# Use .data here to get around the version checkx_tensor=x_tensor.dataford_idx,x_idxinenumerate(product(*[range(m)forminx_tensor.size()])):
- orig=x_tensor[x_idx].item()
- x_tensor[x_idx]=orig-eps
- outa=fn(input).clone()
- x_tensor[x_idx]=orig+eps
- outb=fn(input).clone()
- x_tensor[x_idx]=orig
- r=(outb-outa)/(2*eps)
- d_tensor[d_idx]=r.detach().reshape(-1)
+ update_jacobians(x_tensor,x_idx,d_tensor,d_idx)returnjacobian
-defget_analytical_jacobian(input,output,nondet_tol=0.0):
+defget_analytical_jacobian(input,output,nondet_tol=0.0,grad_out=1.0):# it is easier to call to_dense() on the sparse output than# to modify analytical jacobianifoutput.is_sparse:raiseValueError('Sparse output is not supported at gradcheck yet. ''Please call to_dense() on the output of fn for gradcheck.')
- ifoutput.layout==torch._mkldnn:
+ ifoutput.layout==torch._mkldnn:# type: ignoreraiseValueError('MKLDNN output is not supported at gradcheck yet. ''Please call to_dense() on the output of fn for gradcheck.')diff_input_list=list(iter_tensors(input,True))
@@ -498,16 +559,19 @@
ifjacobian_x.numel()!=0and(jacobian_x-jacobian_reentrant_x).abs().max()>nondet_tol:reentrant=False
- returnjacobian,reentrant,correct_grad_sizes
+ returnjacobian,reentrant,correct_grad_sizes,correct_grad_types
+
+FAILED_BATCHED_GRAD_MSG="""
+gradcheck or gradgradcheck failed while testing batched gradient computation.
+This could have been invoked in a number of ways (via a test that calls
+gradcheck/gradgradcheck directly or via an autogenerated test).
+
+If you are adding a new operator, please file an issue and then use one of the
+workarounds. The workaround depends on how your test invokes gradcheck/gradgradcheck.
+If the test
+- manually invokes gradcheck/gradgradcheck, then call gradcheck/gradgradcheck
+ with `check_batched_grad=False` as a keyword argument.
+- is OpInfo-based (e.g., in test_ops.py), then modify the OpInfo for the test
+ to have `check_batched_grad=False` and/or `check_batched_gradgrad=False`.
+- is common_method_invocations-based, then add your test to the denylist
+ EXCLUDE_BATCHED_GRAD_TESTS in test_autograd.py
+
+If you're modifying an existing operator that supports batched grad computation,
+or wish to make a new operator work with batched grad computation, please read
+the following.
+
+To compute batched grads (e.g., jacobians, hessians), we vmap over the backward
+computation. The most common failure case is if there is a 'vmap-incompatible
+operation' in the backward pass. Please see
+NOTE: [How to write vmap-compatible backward formulas]
+in the codebase for an explanation of how to fix this.
+""".strip()
+
+defget_failed_batched_grad_test_msg(output_idx,input_idx,res,exp):
+ returnf"""
+For output {output_idx} and input {input_idx}:
+
+{FAILED_BATCHED_GRAD_MSG}
+
+Got:
+{res}
+
+Expected:
+{exp}
+""".strip()
+
+deftest_batched_grad(fail_test,input,output,output_idx):
+ diff_input_list=list(iter_tensors(input,True))
+ grad=functools.partial(torch.autograd.grad,output,diff_input_list,retain_graph=True,allow_unused=True)
+
+ defvjp(v):
+ results=grad(v)
+ results=tuple(gradifgradisnotNoneelse
+ torch.zeros([],dtype=inp.dtype,device=inp.device).expand(inp.shape)
+ forgrad,inpinzip(results,diff_input_list))
+ returnresults
+
+ grad_outputs=[torch.randn_like(output)for_inrange(2)]
+
+ expected=[vjp(gO)forgOingrad_outputs]
+ expected=[torch.stack(shards)forshardsinzip(*expected)]
+
+ # Squash warnings since these are expected to happen in most cases
+ # NB: this doesn't work for CUDA tests: https://github.com/pytorch/pytorch/issues/50209
+ withwarnings.catch_warnings():
+ warnings.filterwarnings("ignore",message="Batching rule not implemented")
+ warnings.filterwarnings("ignore",message="torch.vmap is an experimental prototype")
+ try:
+ result=vmap(vjp)(torch.stack(grad_outputs))
+ exceptRuntimeErrorasex:
+ # It's OK that we're not raising the error at the correct callsite.
+ # That's because the callsite is always going to inside the Python
+ # autograd.grad instead of the C++ traceback of what line in the
+ # backward formula
+ returnfail_test(
+ f'While computing batched gradients, got: {ex}\n\n{FAILED_BATCHED_GRAD_MSG}')
+
+ forinput_idx,(res,exp)inenumerate(zip(result,expected)):
+ iftorch.allclose(res,exp):
+ continue
+ returnfail_test(get_failed_batched_grad_test_msg(output_idx,input_idx,res,exp))def_as_tuple(x):
@@ -554,7 +693,9 @@
Source code for torch.autograd.gradcheck
raise_exception:bool=True,check_sparse_nnz:bool=False,nondet_tol:float=0.0,
- check_undefined_grad:bool=True
+ check_undefined_grad:bool=True,
+ check_grad_dtypes:bool=False,
+ check_batched_grad:bool=False,)->bool:r"""Check gradients computed via small finite differences against analytical gradients w.r.t. tensors in :attr:`inputs` that are of floating point or complex type
@@ -562,6 +703,13 @@
Source code for torch.autograd.gradcheck
The check between numerical and analytical gradients uses :func:`~torch.allclose`.
+ For complex functions, no notion of Jacobian exists. Gradcheck verifies if the numerical and
+ analytical values of Wirtinger and Conjugate Wirtinger derivative are consistent. The gradient
+ computation is done under the assumption that the overall function has a real valued output.
+ For functions with complex output, gradcheck compares the numerical and analytical gradients
+ for two values of :attr:`grad_output`: 1 and 1j. For more details, check out
+ :ref:`complex_autograd-doc`.
+
.. note:: The default values are designed for :attr:`input` of double precision. This check will likely fail if :attr:`input` is of less precision, e.g.,
@@ -589,8 +737,10 @@
Source code for torch.autograd.gradcheck
nondet_tol (float, optional): tolerance for non-determinism. When running identical inputs through the differentiation, the results must either match exactly (default, 0.0) or be within this tolerance.
- check_undefined_grad (bool, options): if True, check if undefined output grads
- are supported and treated as zeros
+ check_undefined_grad (bool, optional): if True, check if undefined output grads
+ are supported and treated as zeros, for ``Tensor`` outputs.
+ check_batched_grad (bool, optional): if True, check if we can compute
+ batched gradients using prototype vmap support. Defaults to False. Returns: True if all differences satisfy allclose condition
@@ -601,29 +751,29 @@
Source code for torch.autograd.gradcheck
returnFalsetupled_inputs=_as_tuple(inputs)
- ifany(t.is_sparsefortintupled_inputsifisinstance(t,torch.Tensor))andnotcheck_sparse_nnz:
+ ifnotcheck_sparse_nnzandany(t.is_sparsefortintupled_inputsifisinstance(t,torch.Tensor)):returnfail_test('gradcheck expects all tensor inputs are dense when check_sparse_nnz is set to False.')# Make sure that gradients are saved for at least one inputany_input_requiring_grad=Falseforidx,inpinenumerate(tupled_inputs):
- ifisinstance(inp,torch.Tensor)andinp.requires_grad:
+ ifis_tensor_like(inp)andinp.requires_grad:ifnot(inp.dtype==torch.float64orinp.dtype==torch.complex128):warnings.warn(
- 'The {}th input requires gradient and '
+ f'Input #{idx} requires gradient and ''is not a double precision floating point or complex. ''This check will likely fail if all the inputs are ''not of double precision floating point or complex. ')content=inp._values()ifinp.is_sparseelseinp# TODO: To cover more problematic cases, replace stride = 0 check with# "any overlap in memory" once we have a proper function to check it.
- ifcontent.layoutisnottorch._mkldnnand \
- notall(st>0orsz<=1forst,szinzip(content.stride(),content.size())):
- raiseRuntimeError(
- 'The {}th input has a dimension with stride 0. gradcheck only '
- 'supports inputs that are non-overlapping to be able to '
- 'compute the numerical gradients correctly. You should call '
- '.contiguous on the input before passing it to gradcheck.')
+ ifcontent.layoutisnottorch._mkldnn:# type: ignore
+ ifnotall(st>0orsz<=1forst,szinzip(content.stride(),content.size())):
+ raiseRuntimeError(
+ 'The {}th input has a dimension with stride 0. gradcheck only '
+ 'supports inputs that are non-overlapping to be able to '
+ 'compute the numerical gradients correctly. You should call '
+ '.contiguous on the input before passing it to gradcheck.')any_input_requiring_grad=Trueinp.retain_grad()ifnotany_input_requiring_grad:
@@ -651,51 +801,113 @@
Source code for torch.autograd.gradcheck
deffn(input):return_as_tuple(func(*input))[i]
- analytical,reentrant,correct_grad_sizes=get_analytical_jacobian(tupled_inputs,o,nondet_tol=nondet_tol)
+ analytical,reentrant,correct_grad_sizes,correct_grad_types=get_analytical_jacobian(tupled_inputs,
+ o,
+ nondet_tol=nondet_tol)numerical=get_numerical_jacobian(fn,tupled_inputs,eps=eps)
+ out_is_complex=o.is_complex()
+
+ ifout_is_complex:
+ # analytical vjp with grad_out = 1.0j
+ analytical_with_imag_grad_out,reentrant_with_imag_grad_out, \
+ correct_grad_sizes_with_imag_grad_out,correct_grad_types_with_imag_grad_out \
+ =get_analytical_jacobian(tupled_inputs,o,nondet_tol=nondet_tol,grad_out=1j)
+ numerical_with_imag_grad_out=get_numerical_jacobian(fn,tupled_inputs,eps=eps,grad_out=1j)
+
+ ifnotcorrect_grad_typesandcheck_grad_dtypes:
+ returnfail_test('Gradient has dtype mismatch')
+
+ ifout_is_complexandnotcorrect_grad_types_with_imag_grad_outandcheck_grad_dtypes:
+ returnfail_test('Gradient (calculated using complex valued grad output) has dtype mismatch')
+
ifnotcorrect_grad_sizes:returnfail_test('Analytical gradient has incorrect size')
- forj,(a,n)inenumerate(zip(analytical,numerical)):
+ ifout_is_complexandnotcorrect_grad_sizes_with_imag_grad_out:
+ returnfail_test('Analytical gradient (calculated using complex valued grad output) has incorrect size')
+
+ defcheckIfNumericalAnalyticAreClose(a,n,j,error_str=''):
+ ifnottorch.allclose(a,n,rtol,atol):
+ returnfail_test(error_str+'Jacobian mismatch for output %d with respect to input %d,\n'
+ 'numerical:%s\nanalytical:%s\n'%(i,j,n,a))
+
+ inp_tensors=iter_tensors(tupled_inputs,True)
+
+ forj,(a,n,inp)inenumerate(zip(analytical,numerical,inp_tensors)):ifa.numel()!=0orn.numel()!=0:
- ifnottorch.allclose(a,n,rtol,atol):
- returnfail_test('Jacobian mismatch for output %d with respect to input %d,\n'
- 'numerical:%s\nanalytical:%s\n'%(i,j,n,a))
+ ifo.is_complex():
+ # C -> C, R -> C
+ a_with_imag_grad_out=analytical_with_imag_grad_out[j]
+ n_with_imag_grad_out=numerical_with_imag_grad_out[j]
+ checkIfNumericalAnalyticAreClose(a_with_imag_grad_out,n_with_imag_grad_out,j,
+ "Gradients failed to compare equal for grad output = 1j. ")
+ ifinp.is_complex():
+ # C -> R, C -> C
+ checkIfNumericalAnalyticAreClose(a,n,j,
+ "Gradients failed to compare equal for grad output = 1. ")
+ else:
+ # R -> R, R -> C
+ checkIfNumericalAnalyticAreClose(a,n,j)
+
+
+ defnot_reentrant_error(error_str=''):
+ error_msg="Backward"+error_str+" is not reentrant, i.e., running backward with same \
+ input and grad_output multiple times gives different values, \
+ although analytical gradient matches numerical gradient. \
+ The tolerance for nondeterminism was {}.".format(nondet_tol)
+ returnfail_test(error_msg)ifnotreentrant:
- returnfail_test('Backward is not reentrant, i.e., running backward with same '
- 'input and grad_output multiple times gives different values, '
- 'although analytical gradient matches numerical gradient. '
- 'The tolerance for nondeterminism was {}.'.format(nondet_tol))
+ returnnot_reentrant_error()
+
+ ifout_is_complexandnotreentrant_with_imag_grad_out:
+ returnnot_reentrant_error(' (calculated using complex valued grad output)')
+
+ ifcheck_batched_grad:
+ assertreentrant,('Batched gradient checking makes the assumption that '
+ 'backward is reentrant. This assertion should never '
+ 'be triggered: we expect gradcheck to have early '
+ 'exited before reaching this point if backward is '
+ 'not reentrant. Please file us a bug report.')
+ # NB: test_batched_grad compares two autograd.grad invocations with a single
+ # vmap(autograd.grad) invocation. It's not exactly a "gradcheck" in the
+ # sense that we're not comparing an analytical jacobian with a numeric one,
+ # but it is morally similar (we could have computed a full analytic jac
+ # via vmap, but that is potentially slow)
+ test_batched_grad(fail_test,tupled_inputs,o,j)# check if the backward multiplies by grad_outputoutput=_differentiable_outputs(func(*tupled_inputs))ifany([o.requires_gradforoinoutput]):
- diff_input_list=list(iter_tensors(tupled_inputs,True))
+ diff_input_list:List[torch.Tensor]=list(iter_tensors(tupled_inputs,True))ifnotdiff_input_list:raiseRuntimeError("no Tensors requiring grad found in input")grads_input=torch.autograd.grad(output,diff_input_list,[torch.zeros_like(o,memory_format=torch.legacy_contiguous_format)foroinoutput],allow_unused=True)
- forgi,iinzip(grads_input,diff_input_list):
+ forgi,diinzip(grads_input,diff_input_list):ifgiisNone:continueifisinstance(gi,torch.Tensor)andgi.layout!=torch.strided:
- ifgi.layout!=i.layout:
- returnfail_test('grad is incorrect layout ('+str(gi.layout)+' is not '+str(i.layout)+')')
+ ifgi.layout!=di.layout:
+ returnfail_test('grad is incorrect layout ('+str(gi.layout)+' is not '+str(di.layout)+')')ifgi.layout==torch.sparse_coo:
- ifgi.sparse_dim()!=i.sparse_dim():
+ ifgi.sparse_dim()!=di.sparse_dim():returnfail_test('grad is sparse tensor, but has incorrect sparse_dim')
- ifgi.dense_dim()!=i.dense_dim():
+ ifgi.dense_dim()!=di.dense_dim():returnfail_test('grad is sparse tensor, but has incorrect dense_dim')gi=gi.to_dense()
- i=i.to_dense()
- ifnotgi.eq(0).all():
+ di=di.to_dense()
+
+ ifcheck_sparse_nnz:
+ ifnottorch.allclose(gi,torch.zeros_like(gi)):
+ returnfail_test('backward not multiplied by grad_output')
+ elifnotgi.eq(0).all():returnfail_test('backward not multiplied by grad_output')
- ifgi.dtype!=i.dtypeorgi.device!=i.deviceorgi.is_sparse!=i.is_sparse:
+ ifgi.dtype!=di.dtypeorgi.device!=di.deviceorgi.is_sparse!=di.is_sparse:returnfail_test("grad is incorrect type")
- ifgi.size()!=i.size():
+ ifgi.size()!=di.size():returnfail_test('grad is incorrect size')ifcheck_undefined_grad:
@@ -730,7 +942,11 @@
Source code for torch.autograd.gradcheck
returnTrue# All backward functions must work properly if all output grads are undefined
- outputs_to_check=[[torch._C._functions.UndefinedGrad()(o)foroin_differentiable_outputs(func(*tupled_inputs))]]
+ outputs_to_check=[[
+ torch._C._functions.UndefinedGrad()(o)foroin_differentiable_outputs(func(*tupled_inputs))
+ # This check filters out Tensor-likes that aren't instances of Tensor.
+ ifisinstance(o,torch.Tensor)
+ ]]# If there are multiple output grads, we should be able to undef one at a time without erroriflen(outputs_to_check[0])>1:
@@ -757,7 +973,9 @@
Source code for torch.autograd.gradcheck
gen_non_contig_grad_outputs:bool=False,raise_exception:bool=True,nondet_tol:float=0.0,
- check_undefined_grad:bool=True
+ check_undefined_grad:bool=True,
+ check_grad_dtypes:bool=False,
+ check_batched_grad:bool=False,)->bool:r"""Check gradients of gradients computed via small finite differences against analytical gradients w.r.t. tensors in :attr:`inputs` and
@@ -802,8 +1020,10 @@
Source code for torch.autograd.gradcheck
exactly (default, 0.0) or be within this tolerance. Note that a small amount of nondeterminism in the gradient will lead to larger inaccuracies in the second derivative.
- check_undefined_grad (bool, options): if True, check if undefined output grads
+ check_undefined_grad (bool, optional): if True, check if undefined output grads are supported and treated as zeros
+ check_batched_grad (bool, optional): if True, check if we can compute
+ batched gradients using prototype vmap support. Defaults to False. Returns: True if all differences satisfy allclose condition
@@ -834,8 +1054,10 @@
def__init__(self,*args,**kwargs):use_cuda=kwargs.pop('use_cuda',True)profile_memory=kwargs.pop('profile_memory',False)
+ with_flops=kwargs.pop('with_flops',False)super(EventList,self).__init__(*args,**kwargs)
- self._cpu_children_populated=Falseself._use_cuda=use_cudaself._profile_memory=profile_memory
+ self._tree_built=False
+ self._with_flops=with_flops
+
+ def_build_tree(self):
+ self._populate_cpu_children()
+ self._remove_dup_nodes()
+ self._set_backward_stacktraces()
+ self._tree_built=Truedef__str__(self):returnself.table()
- defpopulate_cpu_children(self):
+ def_remove_dup_nodes(self):
+ whileTrue:
+ to_delete=[]
+ foridxinrange(len(self)):
+ if(self[idx].cpu_parentisnotNoneand
+ self[idx].cpu_parent.name==self[idx].nameand
+ len(self[idx].cpu_parent.cpu_children)==1):
+ self[idx].cpu_parent.cpu_children=self[idx].cpu_children
+ self[idx].cpu_parent.kernels=self[idx].kernels# lift kernels up
+ forchinself[idx].cpu_children:
+ ch.cpu_parent=self[idx].cpu_parent
+ to_delete.append(idx)
+ iflen(to_delete)==0:
+ break
+ new_evts=[evforind,evinenumerate(self)ifindnotinto_delete]
+ self.clear()
+ self.extend(new_evts)
+
+ def_populate_cpu_children(self):"""Populates child events into each underlying FunctionEvent object. One event is a child of another if [s1, e1) is inside [s2, e2). Where s1 and e1 would be start and end of the child event's interval. And
@@ -391,13 +468,11 @@
Source code for torch.autograd.profiler
If for any reason two intervals intersect only partially, this function will not record a parent child relationship between then. """
- ifself.cpu_children_populated:
- return# Some events can be async (i.e. start and end on different threads),# since it's generally undefined how to attribute children ranges to# async ranges, we do not use them when calculating nested ranges and stats
- sync_events=[evtforevtinselfifnotevt.is_async]
+ sync_events=[evtforevtinselfifnotevt.is_asyncandevt.device_type==DeviceType.CPU]events=sorted(sync_events,key=attrgetter("thread"),
@@ -422,44 +497,75 @@
Source code for torch.autograd.profiler
# Algorithm has O(N * log(N)) complexity where N is number of# intervalsforthread_id,thread_eventsinthreads:
- thread_events=sorted(
+ thread_events_=sorted(thread_events,
- key=lambdaevent:[event.cpu_interval.start,-event.cpu_interval.end],
+ key=lambdaevent:[event.time_range.start,-event.time_range.end],)
- current_events=[]
+ current_events:List[FunctionEvent]=[]cur_end=0
- foreventinthread_events:
+ foreventinthread_events_:whilelen(current_events)>0:parent=current_events[-1]
- ifevent.cpu_interval.start>=parent.cpu_interval.endor \
- event.cpu_interval.end>parent.cpu_interval.end:
+ ifevent.time_range.start>=parent.time_range.endor \
+ event.time_range.end>parent.time_range.end:# this can't be a parentcurrent_events.pop()else:parent.append_cpu_child(event)
+ assert(
+ event.cpu_parentisNone
+ ),"There is already a CPU parent event for {}".format(
+ event.key
+ )
+ event.set_cpu_parent(parent)breakcurrent_events.append(event)
- self._cpu_children_populated=True
+ def_set_backward_stacktraces(self):
+ defbw_parent(evt):
+ ifevtisNone:
+ returnNone
+ elifevt.scope==1:# BACKWARD_FUNCTION
+ returnevt
+ else:
+ returnbw_parent(evt.cpu_parent)
+
+ fwd_stacks={}
+ forevtinself:
+ ifbw_parent(evt)isNoneandevt.stackisnotNone:
+ t=(evt.sequence_nr,evt.thread)
+ iftnotinfwd_stacks:
+ fwd_stacks[t]=evt.stack
+
+ forevtinself:
+ p=bw_parent(evt)
+ ifpisnotNone:
+ assertp.fwd_threadisnotNone
+ t=(p.sequence_nr,p.fwd_thread)
+ iftinfwd_stacks:
+ evt.stack=fwd_stacks[t]
+ else:
+ evt.stack=[]@propertydefself_cpu_time_total(self):returnsum([event.self_cpu_time_totalforeventinself])
- @property
- defcpu_children_populated(self):
- returnself._cpu_children_populated
-
- deftable(self,sort_by=None,row_limit=100,header=None):
+ deftable(self,sort_by=None,row_limit=100,max_src_column_width=75,header=None,top_level_events_only=False):"""Prints an EventList as a nicely formatted table.
- Arguments:
+ Args: sort_by (str, optional): Attribute used to sort entries. By default they are printed in the same order as they were registered. Valid keys include: ``cpu_time``, ``cuda_time``, ``cpu_time_total``, ``cuda_time_total``, ``cpu_memory_usage``, ``cuda_memory_usage``, ``self_cpu_memory_usage``, ``self_cuda_memory_usage``, ``count``.
+ top_level_events_only(bool, optional): Boolean flag to determine the
+ selection of events to display. If true, the profiler will only
+ display events at top level like top-level invocation of python
+ `lstm`, python `add` or other functions, nested events like low-level
+ cpu/cuda ops events are omitted for profiler result readability. Returns: A string containing the table.
@@ -468,16 +574,18 @@
Source code for torch.autograd.profiler
self,sort_by=sort_by,row_limit=row_limit,
+ max_src_column_width=max_src_column_width,header=header,
- use_cuda=self._use_cuda,
- profile_memory=self._profile_memory)
+ profile_memory=self._profile_memory,
+ with_flops=self._with_flops,
+ top_level_events_only=top_level_events_only)defexport_chrome_trace(self,path):"""Exports an EventList as a Chrome tracing tools file. The checkpoint can be later loaded and inspected under ``chrome://tracing`` URL.
- Arguments:
+ Args: path (str): Path where the trace will be written. """importos
@@ -488,6 +596,8 @@
Source code for torch.autograd.profiler
# this technique is proven to give a 4x speedup.f.write("[")forevtinself:
+ ifevt.trace_nameisNone:
+ continuef.write('{"name": "%s", ''"ph": "X", '
@@ -497,9 +607,9 @@
f.truncate()f.write("]")
- defkey_averages(self,group_by_input_shapes=False):
+ defsupported_export_stacks_metrics(self):
+ return["self_cpu_time_total","self_cuda_time_total"]
+
+ defexport_stacks(self,path:str,metric:str):
+ ifmetricnotinself.supported_export_stacks_metrics():
+ raiseValueError("metric should be one of: "+str(self.supported_export_stacks_metrics()))
+ translate_table=str.maketrans(" ;\t\n","____")
+ withopen(path,'w')asf:
+ forevtinself:
+ ifevt.stackandlen(evt.stack)>0:
+ metric_value=getattr(evt,metric)
+ ifint(metric_value)>0:
+ stack_str=""
+ forentryinreversed(evt.stack):
+ stack_str+=entry.translate(translate_table)
+ stack_str+=";"
+ stack_str=stack_str[:-1]+" "+str(int(metric_value))
+ f.write(stack_str+"\n")
+
+ defkey_averages(self,group_by_input_shapes=False,group_by_stack_n=0):"""Averages all function events over their keys.
- @param group_by_input_shapes The key would become
- (event name, input dimensions) rather than just event name.
- This is useful to see which dimensionality contributes to the runtime
- the most and may help with dimension specific optimizations or
- choosing best candidates for quantization (aka fitting a roof line)
+ Args:
+ group_by_input_shapes: group entries by
+ (event name, input shapes) rather than just event name.
+ This is useful to see which input shapes contribute to the runtime
+ the most and may help with size-specific optimizations or
+ choosing the best candidates for quantization (aka fitting a roof line)
+
+ group_by_stack_n: group by top n stack trace entries Returns: An EventList containing FunctionEventAvg objects. """
- self.populate_cpu_children()
- stats=defaultdict(FunctionEventAvg)
+ assertself._tree_built
+ stats:Dict[Tuple[str,...],FunctionEventAvg]=defaultdict(FunctionEventAvg)
- defget_key(event,group_by_input_shapes):
- ifnotgroup_by_input_shapes:
- return(event.key,event.node_id)
- return(event.key,str(event.input_shapes),event.node_id)
+ defget_key(event,group_by_input_shapes,group_by_stack_n)->Tuple[str,...]:
+ key=[str(event.key),str(event.node_id),str(event.device_type),str(event.is_legacy)]
+ ifgroup_by_input_shapes:
+ key.append(str(event.input_shapes))
+ ifgroup_by_stack_n>0:
+ key+=event.stack[:group_by_stack_n]
+ returntuple(key)forevtinself:
- stats[get_key(evt,group_by_input_shapes)].add(
- evt,group_by_input_shapes)
- returnEventList(stats.values(),use_cuda=self._use_cuda,profile_memory=self._profile_memory)
+ stats[get_key(evt,group_by_input_shapes,group_by_stack_n)].add(evt)
+
+ avg_list=EventList(
+ stats.values(),
+ use_cuda=self._use_cuda,
+ profile_memory=self._profile_memory,
+ with_flops=self._with_flops)
+ forevtinavg_list:
+ evt.stack=evt.stack[:group_by_stack_n]
+ ifnotgroup_by_input_shapes:
+ evt.input_shapes=""
+ returnavg_listdeftotal_average(self):"""Averages all events.
@@ -585,13 +729,11 @@
Source code for torch.autograd.profiler
only report runtime of PyTorch functions. Note: profiler is thread local and is automatically propagated into the async tasks
- Arguments:
+ Args: enabled (bool, optional): Setting this to False makes this context manager a no-op.
- Default: ``True``. use_cuda (bool, optional): Enables timing of CUDA events as well using the cudaEvent API. Adds approximately 4us of overhead to each tensor operation.
- Default: ``False`` record_shapes (bool, optional): If shapes recording is set, information about input dimensions will be collected. This allows one to see which
@@ -604,10 +746,23 @@