diff --git a/.github/workflows/build_main_documentation.yml b/.github/workflows/build_main_documentation.yml index 198b81005b9..3f8c230a98b 100644 --- a/.github/workflows/build_main_documentation.yml +++ b/.github/workflows/build_main_documentation.yml @@ -171,7 +171,7 @@ jobs: - name: Combine subpackage documentation run: | cd optimum - sudo python docs/combine_docs.py --subpackages habana intel neuron furiosa amd --version ${{ env.VERSION }} + sudo python docs/combine_docs.py --subpackages nvidia amd intel neuron habana furiosa --version ${{ env.VERSION }} cd .. - name: Push to repositories diff --git a/.github/workflows/build_pr_documentation.yml b/.github/workflows/build_pr_documentation.yml index 016852f4799..01d4c4e7a41 100644 --- a/.github/workflows/build_pr_documentation.yml +++ b/.github/workflows/build_pr_documentation.yml @@ -48,6 +48,11 @@ jobs: repository: 'huggingface/optimum-furiosa' path: optimum-furiosa + - uses: actions/checkout@v2 + with: + repository: 'huggingface/optimum-amd' + path: optimum-amd + - name: Setup environment run: | pip uninstall -y doc-builder @@ -78,6 +83,14 @@ jobs: run: | echo "For PRs we don't build Furiosa doc" + - name: Make AMD documentation + run: | + sudo docker system prune -a -f + cd optimum-amd + make doc BUILD_DIR=amd-doc-build VERSION=pr_$PR_NUMBER + sudo mv amd-doc-build ../optimum + cd .. + - name: Make Optimum documentation run: | sudo docker system prune -a -f @@ -88,7 +101,7 @@ jobs: - name: Combine subpackage documentation run: | cd optimum - sudo python docs/combine_docs.py --subpackages habana intel neuron furiosa --version pr_$PR_NUMBER + sudo python docs/combine_docs.py --subpackages nvidia amd intel neuron habana furiosa --version pr_$PR_NUMBER sudo mv optimum-doc-build ../ cd .. diff --git a/README.md b/README.md index d42e9454277..4907ca62839 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ python -m pip install optimum[onnxruntime]@git+https://github.com/huggingface/op ## Accelerated Inference -🤗 Optimum provides multiple tools to export and run optimized models on various ecosystems: +🤗 Optimum provides multiple tools to export and run optimized models on various ecosystems: - [ONNX](https://huggingface.co/docs/optimum/exporters/onnx/usage_guides/export_a_model) / [ONNX Runtime](https://huggingface.co/docs/optimum/onnxruntime/usage_guides/models) - TensorFlow Lite diff --git a/docs/combine_docs.py b/docs/combine_docs.py index 8c2237ed4c0..17d99a3a021 100755 --- a/docs/combine_docs.py +++ b/docs/combine_docs.py @@ -6,6 +6,9 @@ import yaml +SUBPACKAGE_TOC_INSERT_INDEX = 2 + + parser = argparse.ArgumentParser( description="Script to combine doc builds from subpackages with base doc build of Optimum. " "Assumes all subpackage doc builds are present in the root of the `optimum` repo." @@ -89,7 +92,7 @@ def add_neuron_doc(base_toc: List): """ # Update optimum table of contents base_toc.insert( - 1, + SUBPACKAGE_TOC_INSERT_INDEX, { "sections": [ { @@ -118,6 +121,10 @@ def main(): if subpackage == "neuron": # Neuron has its own doc so it is managed differently add_neuron_doc(base_toc) + elif subpackage == "nvidia": + # At the moment, Optimum Nvidia's doc is the README of the GitHub repo + # It is linked to in optimum/docs/source/nvidia_overview.mdx + continue else: subpackage_path = Path(f"{subpackage}-doc-build") @@ -140,10 +147,13 @@ def main(): # Extend table of contents sections with the subpackage name as the parent folder rename_subpackage_toc(subpackage, subpackage_toc) # Just keep the name of the partner in the TOC title - subpackage_toc[0]["title"] = subpackage_toc[0]["title"].split("Optimum ")[-1] + if subpackage == "amd": + subpackage_toc[0]["title"] = subpackage_toc[0]["title"].split("Optimum-")[-1] + else: + subpackage_toc[0]["title"] = subpackage_toc[0]["title"].split("Optimum ")[-1] if subpackage != "graphcore": # Update optimum table of contents - base_toc.insert(1, subpackage_toc[0]) + base_toc.insert(SUBPACKAGE_TOC_INSERT_INDEX, subpackage_toc[0]) # Write final table of contents with open(base_toc_path, "w") as f: diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml index 4d3f4e11dbd..8444da1b9a9 100644 --- a/docs/source/_toctree.yml +++ b/docs/source/_toctree.yml @@ -12,6 +12,11 @@ title: Quantization title: Conceptual guides title: Overview +- sections: + - local: nvidia_overview + title: 🤗 Optimum Nvidia + title: Nvidia + isExpanded: false - sections: - local: onnxruntime/overview title: Overview @@ -95,6 +100,18 @@ title: "TFLite" title: Exporters isExpanded: false +- sections: + - local: bettertransformer/overview + title: Overview + - sections: + - local: bettertransformer/tutorials/convert + title: Convert Transformers models to use BetterTransformer + - local: bettertransformer/tutorials/contribute + title: How to add support for new architectures? + title: Tutorials + isExpanded: false + title: BetterTransformer + isExpanded: false - sections: - local: torch_fx/overview title: Overview @@ -115,18 +132,6 @@ isExpanded: false title: Torch FX isExpanded: false -- sections: - - local: bettertransformer/overview - title: Overview - - sections: - - local: bettertransformer/tutorials/convert - title: Convert Transformers models to use BetterTransformer - - local: bettertransformer/tutorials/contribute - title: How to add support for new architectures? - title: Tutorials - isExpanded: false - title: BetterTransformer - isExpanded: false - sections: - local: llm_quantization/usage_guides/quantization title: GPTQ quantization diff --git a/docs/source/index.mdx b/docs/source/index.mdx index b8d0d44aae4..4e61e960d58 100644 --- a/docs/source/index.mdx +++ b/docs/source/index.mdx @@ -19,11 +19,20 @@ As such, Optimum enables developers to efficiently use any of these platforms wi 🤗 Optimum is distributed as a collection of packages - check out the links below for an in-depth look at each one. + +## Hardware partners + +The packages below enable you to get the best of the 🤗 Hugging Face ecosystem on various types of devices. +
-
Habana
-

Maximize training throughput and efficiency with Habana's Gaudi processor

+
NVIDIA
+

Accelerate inference with NVIDIA TensorRT-LLM on the NVIDIA platform

+
+
AMD
+

Enable performance optimizations for AMD Instinct GPUs and AMD Ryzen AI NPUs

Intel
@@ -33,25 +42,42 @@ As such, Optimum enables developers to efficiently use any of these platforms wi >
AWS Trainium/Inferentia

Accelerate your training and inference workflows with AWS Trainium and AWS Inferentia

-
NVIDIA
-

Accelerate inference with NVIDIA TensorRT-LLM on the NVIDIA platform

-
-
AMD
-

Enable performance optimizations for AMD Instinct GPUs and AMD Ryzen AI NPUs

+
Habana
+

Maximize training throughput and efficiency with Habana's Gaudi processor

FuriosaAI

Fast and efficient inference on FuriosaAI WARBOY

+
+
+ +> [!TIP] +> Some packages provide hardware-agnostic features (e.g. INC interface in Optimum Intel). + + +## Open-source integrations + +🤗 Optimum also supports a variety of open-source frameworks to make model optimization very easy. + +
+
ONNX Runtime

Apply quantization and graph optimization to accelerate Transformers models training and inference with ONNX Runtime

+
Exporters
+

Export your PyTorch or TensorFlow model to different formats such as ONNX and TFLite

+
BetterTransformer

A one-liner integration to use PyTorch's BetterTransformer with Transformers models

+
Torch FX
+

Create and compose custom graph transformations to optimize PyTorch Transformers models with Torch FX

+
diff --git a/docs/source/nvidia_overview.mdx b/docs/source/nvidia_overview.mdx new file mode 100644 index 00000000000..f78583c27d5 --- /dev/null +++ b/docs/source/nvidia_overview.mdx @@ -0,0 +1,3 @@ +# 🤗 Optimum Nvidia + +Find more information about 🤗 Optimum Nvidia [here](https://github.com/huggingface/optimum-nvidia).