Merge branch 'master' into public

NVIDIA · Jan 10, 2024 · 3e80b90 · 3e80b90
2 parents 79a56a9 + 8a10a31
commit 3e80b90
Show file tree

Hide file tree

Showing 275 changed files with 16,273 additions and 40,042 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -13,3 +13,7 @@
 *.grid filter=lfs diff=lfs merge=lfs -text
 *.nvdb filter=lfs diff=lfs merge=lfs -text
 *.gif filter=lfs diff=lfs merge=lfs -text
+
+# Exclude vendored code from project language stats
+warp/native/cutlass/** linguist-vendored
+tools/** linguist-vendored
diff --git a/.github/workflows/sphinx.yml b/.github/workflows/sphinx.yml
@@ -0,0 +1,30 @@
+name: Deploy Sphinx documentation to Pages
+
+# Runs on pushes targeting the default branch
+on:
+  push:
+    branches: [github-pages]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+    steps:
+      - uses: actions/checkout@v4
+      - name: Build HTML
+        uses: ammaraskar/sphinx-action@master
+        with:
+          docs-folder: "docs/"
+          build-command: "python ../build_docs.py"
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v3
+        with:
+          name: html-docs
+          path: docs/_build/html/
+      - name: Deploy
+        uses: peaceiris/actions-gh-pages@v3
+        if: github.ref == 'refs/heads/github-pages'
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          publish_dir: docs/_build/html
diff --git a/.gitignore b/.gitignore
@@ -18,8 +18,7 @@ archive
 /_repo
 examples/assets/.thumbs
 /examples/tmp
-/docs/_build/doctrees
-/docs/_build/html/_static/fonts
+/docs/_build
 /warp_lang.egg-info
 exts/omni.warp/omni/warp/ogn/tests/usd
 build/lib/
@@ -29,4 +28,5 @@ exts/omni.warp/config/extension.gen.toml
 /build
 /dist
 .coverage
+.cache
 warp/tests/outputs
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -0,0 +1,24 @@
+stages:
+  - deploy
+
+pages:
+  stage: deploy
+  image: python:3.11-slim
+  before_script:
+    - echo -e "\\e[0Ksection_start:`date +%s`:my_first_section[collapsed=true]\\r\\e[0KSet up docs environment"
+    - apt-get update && apt-get install make --no-install-recommends -y
+    - python -m pip install --upgrade pip
+    - python -m pip install -r docs/requirements.txt
+    - echo -e "\\e[0Ksection_end:`date +%s`:my_first_section\\r\\e[0K"
+  script:
+    - cd docs && make clean
+    - python ../build_docs.py --no-color
+  after_script:
+    - mv docs/_build/html/ ./public/
+  artifacts:
+    paths:
+    - public
+  rules:
+    - if: $CI_COMMIT_BRANCH == "master"
+  tags:
+    - pages
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,15 +5,15 @@
 - Fix for kernel caching when function argument types change
 - Fix code-gen ordering of dependent structs
 - Fix for `wp.Mesh` build on MGPU systems
-- Fix for name clash bug with adjoint code: https://github.com/NVIDIA/warp/issues/154 
+- Fix for name clash bug with adjoint code: https://github.com/NVIDIA/warp/issues/154
 - Add `wp.frac()` for returning the fractional part of a floating point value
 - Add support for custom native CUDA snippets using `@wp.func_native` decorator
 - Add support for batched matmul with batch size > 2^16-1
-- Add support for tranposed CUTLASS `wp.matmul()` and additional error checking
+- Add support for transposed CUTLASS `wp.matmul()` and additional error checking
 - Add support for quad and hex meshes in `wp.fem`
-- Detect and warn when C++ runtime doesn't match compiler during build, e.g.: libstdc++.so.6: version `GLIBCXX_3.4.30' not found
+- Detect and warn when C++ runtime doesn't match compiler during build, e.g.: ``libstdc++.so.6: version `GLIBCXX_3.4.30' not found``
 - Documentation update for `wp.BVH`
-- Documentaiton and simplified API for runtime kernel specialization `wp.Kernel`
+- Documentation and simplified API for runtime kernel specialization `wp.Kernel`
 
 ## [1.0.0-beta.4] - 2023-11-01
 
@@ -43,7 +43,11 @@
 - Fix for `wp.func` to return a default value if function does not return on all control paths
 - Refactor `wp.fem` support for new basis functions, decoupled function spaces
 - Optimizations for `wp.noise` functions, up to 10x faster in most cases
-- Optimizations for `type_size_in_bytes()` used in array construction
+- Optimizations for `type_size_in_bytes()` used in array construction'
+
+### Breaking Changes
+
+- To support grid-stride kernels, `wp.tid()` can no longer be called inside `wp.func` functions.
 
 ## [1.0.0-beta.2] - 2023-09-01
 
@@ -144,7 +148,7 @@
 ## [0.9.0] - 2023-06-01
 
 - Add support for in-place modifications to vector, matrix, and struct types inside kernels (will warn during backward pass with `wp.verbose` if using gradients)
-- Add support for step-through VSCode debugging of kernel code with standalone LLVM compiler, see `wp.breakpoint()`, and `test_debug.py`
+- Add support for step-through VSCode debugging of kernel code with standalone LLVM compiler, see `wp.breakpoint()`, and `walkthrough_debug.py`
 - Add support for default values on built-in functions
 - Add support for multi-valued `@wp.func` functions
 - Add support for `pass`, `continue`, and `break` statements
@@ -181,7 +185,7 @@
 ## [0.8.1] - 2023-04-13
 
 - Fix for regression when passing flattened numeric lists as matrix arguments to kernels
-- Fix for regressions when passing wp.struct types with uninitialized (None) member attributes
+- Fix for regressions when passing `wp.struct` types with uninitialized (`None`) member attributes
 
 ## [0.8.0] - 2023-04-05
 
@@ -215,20 +219,18 @@
 - Optimizations for `wp.launch()`, up to 3x faster launches in common cases
 - Fix `wp.randf()` conversion to float to reduce bias for uniform sampling
 - Fix capture of `wp.func` and `wp.constant` types from inside Python closures
-- Fix for CUDA on WSL 
+- Fix for CUDA on WSL
 - Fix for matrices in structs
 - Fix for transpose indexing for some non-square matrices
 - Enable Python faulthandler by default
 - Update to VS2019
 
-Breaking Changes
-----------------
+### Breaking Changes
 
 - `wp.constant` variables can now be treated as their true type, accessing the underlying value through `constant.val` is no longer supported
 - `wp.sim.model.ground_plane` is now a `wp.array` to support gradient, users should call `builder.set_ground_plane()` to create the ground 
 - `wp.sim` capsule, cones, and cylinders are now aligned with the default USD up-axis
 
-
 ## [0.7.2] - 2023-02-15
 
 - Reduce test time for vec/math types
@@ -248,9 +250,9 @@ Breaking Changes
 - Add support for slicing `wp.array` types in Python
 - Add `wp.from_ptr()` helper to construct arrays from an existing allocation
 - Add support for `break` statements in ranged-for and while loops (backward pass support currently not implemented)
-- Add built-in mathematic constants, see `wp.pi`, `wp.e`, `wp.log2e`, etc
+- Add built-in mathematic constants, see `wp.pi`, `wp.e`, `wp.log2e`, etc.
 - Add built-in conversion between degrees and radians, see `wp.degrees()`, `wp.radians()`
-- Add security pop-up for Kernel Node 
+- Add security pop-up for Kernel Node
 - Improve error handling for kernel return values
 
 ## [0.6.3] - 2023-01-31
@@ -336,7 +338,7 @@ Breaking Changes
 - Fix for hashing of `wp.constants()` not invalidating kernels
 - Fix for reload when multiple `.ptx` versions are present
 - Improved error reporting during code-gen
- 
+
 ## [0.4.3] - 2022-09-20
 
 - Update all samples to use GPU interop path by default
@@ -360,7 +362,6 @@ Breaking Changes
 - Fix for debug flags not being set correctly on CUDA when `wp.config.mode == "debug"`, this enables bounds checking on CUDA kernels in debug mode
 - Fix for code gen of functions that do not return a value
 
-
 ## [0.4.0] - 2022-08-09
 
 - Fix for FP16 conversions on GPUs without hardware support
@@ -373,18 +374,15 @@ Breaking Changes
 - Add support for cross-module `@wp.struct` references
 - Support running even if CUDA initialization failed, use `wp.is_cuda_available()` to check availability
 - Statically linking with the CUDA runtime library to avoid deployment issues
-
 
 ### Breaking Changes
 
 - Removed `wp.runtime` reference from the top-level module, as it should be considered private
 
-
 ## [0.3.2] - 2022-07-19
 
 - Remove Torch import from `__init__.py`, defer import to `wp.from_torch()`, `wp.to_torch()`
 
-
 ## [0.3.1] - 2022-07-12
 
 - Fix for marching cubes reallocation after initialization
@@ -396,15 +394,13 @@ Breaking Changes
 - Add support for using arbitrary external CUDA contexts, see `wp.map_cuda_device()`, `wp.unmap_cuda_device()`
 - Add PyTorch device aliasing functions, see `wp.device_from_torch()`, `wp.device_to_torch()`
 
-
 ### Breaking Changes
 
 - A CUDA device is used by default, if available (aligned with `wp.get_preferred_device()`)
 - `wp.ScopedCudaGuard` is deprecated, use `wp.ScopedDevice` instead
 - `wp.synchronize()` now synchronizes all devices; for finer-grained control, use `wp.synchronize_device()`
 - Device alias `"cuda"` now refers to the current CUDA context, rather than a specific device like `"cuda:0"` or `"cuda:1"`
 
-
 ## [0.3.0] - 2022-07-08
 
 - Add support for FP16 storage type, see `wp.float16`
@@ -429,7 +425,6 @@ Breaking Changes
 - Fix for reload of generated CPU kernel code on Linux
 - Fix for example scripts to output USD at 60 timecodes per-second (better Kit compatibility)
 
-
 ## [0.2.3] - 2022-06-13
 
 - Fix for incorrect 4d array bounds checking
@@ -438,7 +433,6 @@ Breaking Changes
 - Array gradients are now allocated along with the arrays and accessible as `wp.array.grad`, users should take care to always call `wp.Tape.zero()` to clear gradients between different invocations of `wp.Tape.backward()`
 - Added `wp.array.fill_()` to set all entries to a scalar value (4-byte values only currently)
 
-
 ### Breaking Changes
 
 - Tape `capture` option has been removed, users can now capture tapes inside existing CUDA graphs (e.g.: inside Torch)
@@ -471,7 +465,7 @@ Breaking Changes
 ## [0.2.1] - 2022-05-11
 
 - Fix for unit tests in Kit
-- 
+
 ## [0.2.0] - 2022-05-02
 
 ### Warp Core
@@ -509,15 +503,14 @@ Breaking Changes
 - Fix for URDF importer and floating base support
 - Add examples showing how to use differentiable forward kinematics to solve inverse kinematics
 - Add examples for URDF cartpole and quadruped simulation
- 
+
 ### Breaking Changes
 
 - `wp.volume_sample_world()` is now replaced by `wp.volume_sample_f/i/vec()` which operate in index (local) space. Users should use `wp.volume_world_to_index()` to transform points from world space to index space before sampling.
 - `wp.mlp()` expects multi-dimensional arrays instead of one-dimensional arrays for inference, all other semantics remain the same as earlier versions of this API.
 - `wp.array.length` member has been removed, please use `wp.array.shape` to access array dimensions, or use `wp.array.size` to get total element count
 - Marking `dense_gemm()`, `dense_chol()`, etc methods as experimental until we revisit them
 
-
 ## [0.1.25] - 2022-03-20
 
 - Add support for class methods to be Warp kernels
@@ -528,7 +521,6 @@ Breaking Changes
 - Add support for floored division on integer types
 - Move tests into core library so they can be run in Kit environment
 
-
 ## [0.1.24] - 2022-03-03
 
 ### Warp Core

diff --git a/build_docs.py b/build_docs.py
@@ -1,38 +1,47 @@
+# Copyright (c) 2022 NVIDIA CORPORATION.  All rights reserved.
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+
 import os
-import sys
 import subprocess
+import sys
 
-import warp as wp
-
-
-wp.init()
+from warp.context import export_functions_rst, export_stubs
 
 # docs
 
 # disable sphinx color output
 os.environ["NO_COLOR"] = "1"
 
-with open("docs/modules/functions.rst", "w") as function_ref:
-    wp.print_builtins(function_ref)
+base_path = os.path.dirname(os.path.realpath(__file__))
+
+with open(os.path.join(base_path, "docs", "modules", "functions.rst"), "w") as function_ref:
+    export_functions_rst(function_ref)
 
 # run Sphinx build
 try:
-    if os.name == 'nt':
-        subprocess.check_output("make.bat html", cwd="docs", shell=True)
+    docs_folder = os.path.join(base_path, "docs")
+    make_html_cmd = ["make.bat" if os.name == "nt" else "make", "html"]
+
+    if os.name == "nt" or len(sys.argv) == 1:
+        subprocess.check_output(make_html_cmd, cwd=docs_folder)
     else:
-        subprocess.run("make clean", cwd="docs", shell=True)
-        subprocess.check_output("make html", cwd="docs", shell=True)
+        # Sphinx options were passed via the argument list
+        make_html_cmd.append("-e")
+        sphinx_options = " ".join(sys.argv[1:])
+        subprocess.check_output(make_html_cmd, cwd=docs_folder, env=dict(os.environ, SPHINXOPTS=sphinx_options))
 except subprocess.CalledProcessError as e:
     print(e.output.decode())
     raise e
 
-
 # generate stubs for autocomplete
-stub_file = open("warp/stubs.py", "w")
-wp.export_stubs(stub_file)
-stub_file.close()
+with open(os.path.join(base_path, "warp", "stubs.py"), "w") as stub_file:
+    export_stubs(stub_file)
 
 # code formatting
-subprocess.run([sys.executable, "-m", "black", "warp/stubs.py"])
+subprocess.run([sys.executable, "-m", "black", os.path.join(base_path, "warp", "stubs.py")])
 
 print("Finished")
diff --git a/build_lib.py b/build_lib.py
@@ -1,3 +1,10 @@
+# Copyright (c) 2022 NVIDIA CORPORATION.  All rights reserved.
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+
 # This script is an 'offline' build of the core warp runtime libraries
 # designed to be executed as part of CI / developer workflows, not
 # as part of the user runtime (since it requires CUDA toolkit, etc)
@@ -49,6 +56,10 @@
 parser.add_argument("--no_build_llvm", dest="build_llvm", action="store_false")
 parser.set_defaults(build_llvm=False)
 
+parser.add_argument(
+    "--llvm_source_path", type=str, help="Path to the LLVM project source code (optional, repo cloned if not set)"
+)
+
 parser.add_argument("--debug_llvm", action="store_true", help="Enable LLVM compiler code debugging, default disabled")
 parser.add_argument("--no_debug_llvm", dest="debug_llvm", action="store_false")
 parser.set_defaults(debug_llvm=False)