huggingface · regisss · Feb 22, 2024 · Jan 4, 2024 · Jan 4, 2024 · Jan 4, 2024
diff --git a/.github/workflows/build_main_documentation.yml b/.github/workflows/build_main_documentation.yml
@@ -142,7 +142,7 @@ jobs:
       - name: Combine subpackage documentation
         run: |
           cd optimum
-          sudo python docs/combine_docs.py --subpackages graphcore habana intel neuron furiosa amd --version ${{ env.VERSION }}
+          sudo python docs/combine_docs.py --subpackages nvidia amd intel neuron habana furiosa graphcore --version ${{ env.VERSION }}
           cd ..
 
       - name: Push to repositories

diff --git a/.github/workflows/build_pr_documentation.yml b/.github/workflows/build_pr_documentation.yml
@@ -64,6 +64,11 @@ jobs:
           repository: 'huggingface/optimum-furiosa'
           path: optimum-furiosa
 
+      - uses: actions/checkout@v2
+        with:
+          repository: 'huggingface/optimum-amd'
+          path: optimum-amd
+
       - name: Setup environment
         run: |
           pip uninstall -y doc-builder
@@ -118,6 +123,14 @@ jobs:
           mv furiosa-doc-build ../optimum
           cd ..
 
+      - name: Make AMD documentation
+        run: |
+          sudo docker system prune -a -f
+          cd optimum-amd
+          make doc BUILD_DIR=amd-doc-build VERSION=${{ env.VERSION }}
+          sudo mv amd-doc-build ../optimum
+          cd ..
+
       - name: Make Optimum documentation
         run: |
           sudo docker system prune -a -f
@@ -128,7 +141,7 @@ jobs:
       - name: Combine subpackage documentation
         run: |
           cd optimum
-          sudo python docs/combine_docs.py --subpackages graphcore habana intel neuron furiosa --version pr_$PR_NUMBER
+          sudo python docs/combine_docs.py --subpackages nvidia amd intel neuron habana furiosa graphcore --version pr_$PR_NUMBER
           sudo mv optimum-doc-build ../
           cd ..
 

diff --git a/README.md b/README.md
@@ -2,6 +2,8 @@
 
 # Hugging Face Optimum
 
+TEST
+
 🤗 Optimum is an extension of 🤗 Transformers and Diffusers, providing a set of optimization tools enabling maximum efficiency to train and run models on targeted hardware, while keeping things easy to use.
 
 ## Installation
@@ -39,7 +41,7 @@ python -m pip install optimum[onnxruntime]@git+https://github.com/huggingface/op
 
 ## Accelerated Inference
 
-🤗 Optimum provides multiple tools to export and run optimized models on various ecosystems: 
+🤗 Optimum provides multiple tools to export and run optimized models on various ecosystems:
 
 - [ONNX](https://huggingface.co/docs/optimum/exporters/onnx/usage_guides/export_a_model) / [ONNX Runtime](https://huggingface.co/docs/optimum/onnxruntime/usage_guides/models)
 - TensorFlow Lite

diff --git a/docs/combine_docs.py b/docs/combine_docs.py
@@ -105,6 +105,31 @@ def add_neuron_doc(base_toc: List):
     )
 
 
+def add_nvidia_doc(base_toc: List):
+    """
+    Extends the table of content with a section about Optimum Neuron.
+
+    Args:
+        base_toc (List): table of content for the doc of Optimum.
+    """
+    # Update optimum table of contents
+    base_toc.insert(
+        1,
+        {
+            "sections": [
+                {
+                    # Ideally this should directly point at https://huggingface.co/docs/optimum-neuron/index
+                    # Current hacky solution is to have a redirection in _redirects.yml
+                    "local": "https://github.com/huggingface/optimum-nvidia",
+                    "title": "🤗 Optimum Nvidia",
+                }
+            ],
+            "title": "Nvidia",
+            "isExpanded": False,
+        },
+    )
+
+
 def main():
     args = parser.parse_args()
     optimum_path = Path("optimum-doc-build")
@@ -118,6 +143,9 @@ def main():
         if subpackage == "neuron":
             # Neuron has its own doc so it is managed differently
             add_neuron_doc(base_toc)
+        elif subpackage == "nvidia":
+            # At the moment, Optimum Nvidia's doc is the README of the GitHub repo
+            add_nvidia_doc(base_toc)
         else:
             subpackage_path = Path(f"{subpackage}-doc-build")
 
@@ -136,7 +164,10 @@ def main():
             # Extend table of contents sections with the subpackage name as the parent folder
             rename_subpackage_toc(subpackage, subpackage_toc)
             # Just keep the name of the partner in the TOC title
-            subpackage_toc[0]["title"] = subpackage_toc[0]["title"].split("Optimum ")[-1]
+            if subpackage == "amd":
+                subpackage_toc[0]["title"] = subpackage_toc[0]["title"].split("Optimum-")[-1]
+            else:
+                subpackage_toc[0]["title"] = subpackage_toc[0]["title"].split("Optimum ")[-1]
             if subpackage != "graphcore":
                 # Update optimum table of contents
                 base_toc.insert(1, subpackage_toc[0])

diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
@@ -95,6 +95,18 @@
     title: "TFLite"
   title: Exporters
   isExpanded: false
+- sections:
+  - local: bettertransformer/overview
+    title: Overview
+  - sections:
+    - local: bettertransformer/tutorials/convert
+      title: Convert Transformers models to use BetterTransformer
+    - local: bettertransformer/tutorials/contribute
+      title: How to add support for new architectures?
+    title: Tutorials
+    isExpanded: false
+  title: BetterTransformer
+  isExpanded: false
 - sections:
   - local: torch_fx/overview
     title: Overview
@@ -115,18 +127,6 @@
     isExpanded: false
   title: Torch FX
   isExpanded: false
-- sections:
-  - local: bettertransformer/overview
-    title: Overview
-  - sections:
-    - local: bettertransformer/tutorials/convert
-      title: Convert Transformers models to use BetterTransformer
-    - local: bettertransformer/tutorials/contribute
-      title: How to add support for new architectures?
-    title: Tutorials
-    isExpanded: false
-  title: BetterTransformer
-  isExpanded: false
 - sections:
   - local: llm_quantization/usage_guides/quantization
     title: GPTQ quantization

diff --git a/docs/source/index.mdx b/docs/source/index.mdx
@@ -19,11 +19,20 @@ As such, Optimum enables developers to efficiently use any of these platforms wi
 
 🤗 Optimum is distributed as a collection of packages - check out the links below for an in-depth look at each one.
 
+
+## Hardware partners
+
+The packages below enable you to get the best of the 🤗 Hugging Face ecosystem on various types of devices.
+
 <div class="mt-10">
   <div class="w-full flex flex-col space-y-4 md:space-y-0 md:grid md:grid-cols-3 md:gap-y-4 md:gap-x-5">
-    <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./habana/index"
-      ><div class="w-full text-center bg-gradient-to-br from-indigo-400 to-indigo-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Habana</div>
-      <p class="text-gray-700">Maximize training throughput and efficiency with <span class="underline" onclick="event.preventDefault(); window.open('https://docs.habana.ai/en/latest/Gaudi_Overview/Gaudi_Architecture.html', '_blank');">Habana's Gaudi processor</span></p>
+    <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="https://github.com/huggingface/optimum-nvidia"
+      ><div class="w-full text-center bg-gradient-to-br from-green-600 to-green-600 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">NVIDIA</div>
+      <p class="text-gray-700">Accelerate inference with NVIDIA TensorRT-LLM on the <span class="underline" onclick="event.preventDefault(); window.open('https://developer.nvidia.com/blog/nvidia-tensorrt-llm-supercharges-large-language-model-inference-on-nvidia-h100-gpus/', '_blank');">NVIDIA platform</span></p>
+    </a>
+    <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./amd/index"
+      ><div class="w-full text-center bg-gradient-to-br from-red-600 to-red-600 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">AMD</div>
+      <p class="text-gray-700">Enable performance optimizations for <span class="underline" onclick="event.preventDefault(); window.open('https://www.amd.com/en/graphics/instinct-server-accelerators', '_blank');">AMD Instinct GPUs</span> and <span class="underline" onclick="event.preventDefault(); window.open('https://ryzenai.docs.amd.com/en/latest/index.html', '_blank');">AMD Ryzen AI NPUs</span></p>
     </a>
     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./intel/index"
       ><div class="w-full text-center bg-gradient-to-br from-blue-400 to-blue-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Intel</div>
@@ -33,25 +42,39 @@ As such, Optimum enables developers to efficiently use any of these platforms wi
       ><div class="w-full text-center bg-gradient-to-br from-orange-400 to-orange-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">AWS Trainium/Inferentia</div>
       <p class="text-gray-700">Accelerate your training and inference workflows with <span class="underline" onclick="event.preventDefault(); window.open('https://aws.amazon.com/machine-learning/trainium/', '_blank');">AWS Trainium</span> and <span class="underline" onclick="event.preventDefault(); window.open('https://aws.amazon.com/machine-learning/inferentia/', '_blank');">AWS Inferentia</span></p>
     </a>
-    <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="https://github.com/huggingface/optimum-nvidia"
-      ><div class="w-full text-center bg-gradient-to-br from-green-600 to-green-600 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">NVIDIA</div>
-      <p class="text-gray-700">Accelerate inference with NVIDIA TensorRT-LLM on the <span class="underline" onclick="event.preventDefault(); window.open('https://developer.nvidia.com/blog/nvidia-tensorrt-llm-supercharges-large-language-model-inference-on-nvidia-h100-gpus/', '_blank');">NVIDIA platform</span></p>
-    </a>
-    <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./amd/index"
-      ><div class="w-full text-center bg-gradient-to-br from-red-600 to-red-600 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">AMD</div>
-      <p class="text-gray-700">Enable performance optimizations for <span class="underline" onclick="event.preventDefault(); window.open('https://www.amd.com/en/graphics/instinct-server-accelerators', '_blank');">AMD Instinct GPUs</span> and <span class="underline" onclick="event.preventDefault(); window.open('https://ryzenai.docs.amd.com/en/latest/index.html', '_blank');">AMD Ryzen AI NPUs</span></p>
+    <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./habana/index"
+      ><div class="w-full text-center bg-gradient-to-br from-indigo-400 to-indigo-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Habana</div>
+      <p class="text-gray-700">Maximize training throughput and efficiency with <span class="underline" onclick="event.preventDefault(); window.open('https://docs.habana.ai/en/latest/Gaudi_Overview/Gaudi_Architecture.html', '_blank');">Habana's Gaudi processor</span></p>
     </a>
     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./furiosa/index"
       ><div class="w-full text-center bg-gradient-to-br from-green-400 to-green-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">FuriosaAI</div>
       <p class="text-gray-700">Fast and efficient inference on <span class="underline" onclick="event.preventDefault(); window.open('https://www.furiosa.ai/', '_blank');">FuriosaAI WARBOY</span></p>
     </a>
+  </div>
+</div>
+
+
+## Open-source integrations
+
+🤗 Optimum also supports a variety of open-source frameworks to make model optimization very easy.
+
+<div class="mt-10">
+  <div class="w-full flex flex-col space-y-4 md:space-y-0 md:grid md:grid-cols-3 md:gap-y-4 md:gap-x-5">
     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./onnxruntime/overview"
       ><div class="w-full text-center bg-gradient-to-br from-pink-400 to-pink-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">ONNX Runtime</div>
       <p class="text-gray-700">Apply quantization and graph optimization to accelerate Transformers models training and inference with <span class="underline" onclick="event.preventDefault(); window.open('https://onnxruntime.ai/', '_blank');">ONNX Runtime</span></p>
     </a>
+    <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./exporters/overview"
+      ><div class="w-full text-center bg-gradient-to-br from-purple-400 to-purple-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Exporters</div>
+      <p class="text-gray-700">Export your PyTorch or TensorFlow model to different formats such as ONNX and TFLite</p>
+    </a>
     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./bettertransformer/overview"
       ><div class="w-full text-center bg-gradient-to-br from-yellow-400 to-yellow-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">BetterTransformer</div>
       <p class="text-gray-700">A one-liner integration to use <span class="underline" onclick="event.preventDefault(); window.open('https://pytorch.org/blog/a-better-transformer-for-fast-transformer-encoder-inference/', '_blank');">PyTorch's BetterTransformer</span> with Transformers models</p>
     </a>
+    <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./torch_fx/overview"
+      ><div class="w-full text-center bg-gradient-to-br from-green-400 to-green-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Torch FX</div>
+      <p class="text-gray-700">Create and compose custom graph transformations to optimize PyTorch Transformers models with <span class="underline" onclick="event.preventDefault(); window.open('https://pytorch.org/docs/stable/fx.html#', '_blank');">Torch FX</span></p>
+    </a>
   </div>
 </div>