Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into longjie/add_automat…
Browse files Browse the repository at this point in the history
…ic_model_parallel_via_fx
  • Loading branch information
zhenglongjiepheonix committed Jul 8, 2024
2 parents 0876f5d + 171020c commit 87e66fb
Show file tree
Hide file tree
Showing 25 changed files with 397 additions and 134 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test_onnx.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
pip install .[tests,onnxruntime,exporters-tf]
pip install .[tests,exporters]
- name: Test with unittest
working-directory: tests
run: |
Expand Down
7 changes: 3 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,7 @@ It is possible to export 🤗 Transformers and Diffusers models to the OpenVINO
optimum-cli export openvino --model distilbert-base-uncased-finetuned-sst-2-english distilbert_sst2_ov
```

If you add `--weight-format int8`, the weights will be quantized to `int8`, check out our [documentation](https://huggingface.co/docs/optimum/main/en/intel/optimization_ov#weight-only-quantization) for more detail on weight only quantization. To apply quantization on both weights and activations, you can find more information [here](https://huggingface.co/docs/optimum/main/en/intel/optimization_ov#static-quantization).

If you add `--weight-format int8`, the weights will be quantized to `int8`, check out our [documentation](https://huggingface.co/docs/optimum/main/intel/openvino/export) for more detail. To apply quantization on both weights and activations, you can find more information [here](https://huggingface.co/docs/optimum/main/intel/openvino/optimization#static-quantization).

To load a model and run inference with OpenVINO Runtime, you can just replace your `AutoModelForXxx` class with the corresponding `OVModelForXxx` class. To load a PyTorch checkpoint and convert it to the OpenVINO format on-the-fly, you can set `export=True` when loading your model.

Expand All @@ -92,13 +91,13 @@ To load a model and run inference with OpenVINO Runtime, you can just replace yo
model_id = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(model_id)
- model = AutoModelForSequenceClassification.from_pretrained(model_id)
+ model = OVModelForSequenceClassification.from_pretrained("distilbert_sst2_ov")
+ model = OVModelForSequenceClassification.from_pretrained(model_id, export=True)

classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
results = classifier("He's a dreadful magician.")
```

You can find more examples in the [documentation](https://huggingface.co/docs/optimum/intel/inference) and in the [examples](https://github.com/huggingface/optimum-intel/tree/main/examples/openvino).
You can find more examples in the [documentation](https://huggingface.co/docs/optimum/main/intel/openvino/inference) and in the [examples](https://github.com/huggingface/optimum-intel/tree/main/examples/openvino).

### Neural Compressor

Expand Down
19 changes: 11 additions & 8 deletions examples/onnxruntime/optimization/multiple-choice/run_swag.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@

from optimum.onnxruntime import ORTModelForMultipleChoice, ORTOptimizer
from optimum.onnxruntime.configuration import OptimizationConfig
from optimum.onnxruntime.model import ORTModel
from optimum.onnxruntime.utils import evaluation_loop


# Will error if the minimal version of Transformers is not installed. The version of transformers must be >= 4.19.0
Expand Down Expand Up @@ -236,7 +236,6 @@ def main():
)

os.makedirs(training_args.output_dir, exist_ok=True)
optimized_model_path = os.path.join(training_args.output_dir, "model_optimized.onnx")

tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name or model_args.model_name_or_path)

Expand All @@ -254,13 +253,18 @@ def main():
optimizer = ORTOptimizer.from_pretrained(model)

# Optimize the model
optimizer.optimize(
optimized_model_path = optimizer.optimize(
optimization_config=optimization_config,
save_dir=training_args.output_dir,
use_external_data_format=onnx_export_args.use_external_data_format,
one_external_file=onnx_export_args.one_external_file,
)

model = ORTModelForMultipleChoice.from_pretrained(
optimized_model_path,
provider=optim_args.execution_provider,
)

if training_args.do_eval:
# Prepare the dataset downloading, preprocessing and metric creation to perform the evaluation and / or the
# prediction step(s)
Expand Down Expand Up @@ -339,13 +343,12 @@ def compute_metrics(eval_predictions):
# Evaluation
logger.info("*** Evaluate ***")

ort_model = ORTModel(
optimized_model_path,
execution_provider=optim_args.execution_provider,
compute_metrics=compute_metrics,
outputs = evaluation_loop(
model=model,
dataset=eval_dataset,
label_names=["label"],
compute_metrics=compute_metrics,
)
outputs = ort_model.evaluation_loop(eval_dataset)

# Save evaluation metrics
with open(os.path.join(training_args.output_dir, "eval_results.json"), "w") as f:
Expand Down
24 changes: 12 additions & 12 deletions examples/onnxruntime/optimization/question-answering/run_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@

from optimum.onnxruntime import ORTModelForQuestionAnswering, ORTOptimizer
from optimum.onnxruntime.configuration import OptimizationConfig
from optimum.onnxruntime.model import ORTModel
from optimum.onnxruntime.utils import evaluation_loop


# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
Expand Down Expand Up @@ -305,7 +305,6 @@ def main():
)

os.makedirs(training_args.output_dir, exist_ok=True)
optimized_model_path = os.path.join(training_args.output_dir, "model_optimized.onnx")

tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name or model_args.model_name_or_path)

Expand All @@ -323,13 +322,15 @@ def main():
optimizer = ORTOptimizer.from_pretrained(model)

# Optimize the model
optimizer.optimize(
optimized_model_path = optimizer.optimize(
optimization_config=optimization_config,
save_dir=training_args.output_dir,
use_external_data_format=onnx_export_args.use_external_data_format,
one_external_file=onnx_export_args.one_external_file,
)

model = ORTModelForQuestionAnswering.from_pretrained(optimized_model_path, provider=optim_args.execution_provider)

# Prepare the dataset downloading, preprocessing and metric creation to perform the evaluation and / or the
# prediction step(s)
if training_args.do_eval or training_args.do_predict:
Expand Down Expand Up @@ -478,13 +479,12 @@ def compute_metrics(p: EvalPrediction):
# During Feature creation dataset samples might increase, we will select required samples again
eval_dataset = eval_dataset.select(range(data_args.max_eval_samples))

ort_model = ORTModel(
optimized_model_path,
execution_provider=optim_args.execution_provider,
compute_metrics=compute_metrics,
outputs = evaluation_loop(
model=model,
dataset=eval_dataset,
label_names=["start_positions", "end_positions"],
compute_metrics=compute_metrics,
)
outputs = ort_model.evaluation_loop(eval_dataset)
predictions = post_processing_function(eval_examples, eval_dataset, outputs.predictions)
metrics = compute_metrics(predictions)

Expand Down Expand Up @@ -514,12 +514,12 @@ def compute_metrics(p: EvalPrediction):
# During Feature creation dataset samples might increase, we will select required samples again
predict_dataset = predict_dataset.select(range(data_args.max_predict_samples))

ort_model = ORTModel(
optimized_model_path,
execution_provider=optim_args.execution_provider,
outputs = evaluation_loop(
model=model,
dataset=eval_dataset,
label_names=["start_positions", "end_positions"],
compute_metrics=compute_metrics,
)
outputs = ort_model.evaluation_loop(predict_dataset)
predictions = post_processing_function(predict_examples, predict_dataset, outputs.predictions)
metrics = compute_metrics(predictions)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,11 @@ See the License for the specific language governing permissions and
limitations under the License.
-->

# Text classification
# Text classification

## GLUE tasks

The script [`run_glue.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/optimization/text-classification/run_glue.py)
allows us to apply graph optimizations and fusion using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for sequence classification tasks such as
the ones from the [GLUE benchmark](https://gluebenchmark.com/).
The script [`run_glue.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/optimization/text-classification/run_glue.py) allows us to apply graph optimizations and fusion using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for sequence classification tasks such as the ones from the [GLUE benchmark](https://gluebenchmark.com/).

The following example applies graph optimization on a DistilBERT fine-tuned on the sst-2 task. Here the optimization level is selected to be 1, enabling basic optimizations such as redundant node eliminations and constant folding. Higher optimization level will result in hardware dependent optimized graph.

Expand Down
25 changes: 15 additions & 10 deletions examples/onnxruntime/optimization/text-classification/run_glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@

from optimum.onnxruntime import ORTModelForSequenceClassification, ORTOptimizer
from optimum.onnxruntime.configuration import OptimizationConfig
from optimum.onnxruntime.model import ORTModel
from optimum.onnxruntime.utils import evaluation_loop


# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
Expand Down Expand Up @@ -250,7 +250,6 @@ def main():
)

os.makedirs(training_args.output_dir, exist_ok=True)
optimized_model_path = os.path.join(training_args.output_dir, "model_optimized.onnx")

tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path)

Expand All @@ -268,13 +267,17 @@ def main():
optimizer = ORTOptimizer.from_pretrained(model)

# Optimize the model
optimizer.optimize(
optimized_model_path = optimizer.optimize(
optimization_config=optimization_config,
save_dir=training_args.output_dir,
use_external_data_format=onnx_export_args.use_external_data_format,
one_external_file=onnx_export_args.one_external_file,
)

model = ORTModelForSequenceClassification.from_pretrained(
optimized_model_path, provider=optim_args.execution_provider
)

# Prepare the dataset downloading, preprocessing and metric creation to perform the evaluation and / or the
# prediction step(s)
if training_args.do_eval or training_args.do_predict:
Expand Down Expand Up @@ -408,13 +411,13 @@ def compute_metrics(p: EvalPrediction):
desc="Running tokenizer on the evaluation dataset",
)

ort_model = ORTModel(
optimized_model_path,
execution_provider=optim_args.execution_provider,
outputs = evaluation_loop(
model=model,
eval_dataset=eval_dataset,
compute_metrics=compute_metrics,
label_names=["label"],
)
outputs = ort_model.evaluation_loop(eval_dataset)

# Save metrics
with open(os.path.join(training_args.output_dir, "eval_results.json"), "w") as f:
json.dump(outputs.metrics, f, indent=4, sort_keys=True)
Expand All @@ -436,10 +439,12 @@ def compute_metrics(p: EvalPrediction):
desc="Running tokenizer on the test dataset",
)

ort_model = ORTModel(
optimized_model_path, execution_provider=optim_args.execution_provider, label_names=["label"]
outputs = evaluation_loop(
model=model,
eval_dataset=eval_dataset,
compute_metrics=compute_metrics,
label_names=["label"],
)
outputs = ort_model.evaluation_loop(predict_dataset)
predictions = np.squeeze(outputs.predictions) if is_regression else np.argmax(outputs.predictions, axis=1)

# Save predictions
Expand Down
17 changes: 7 additions & 10 deletions examples/onnxruntime/optimization/token-classification/run_ner.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@

from optimum.onnxruntime import ORTModelForTokenClassification, ORTOptimizer
from optimum.onnxruntime.configuration import OptimizationConfig
from optimum.onnxruntime.model import ORTModel
from optimum.onnxruntime.utils import evaluation_loop


# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
Expand Down Expand Up @@ -276,7 +276,6 @@ def main():
)

os.makedirs(training_args.output_dir, exist_ok=True)
optimized_model_path = os.path.join(training_args.output_dir, "model_optimized.onnx")

tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name or model_args.model_name_or_path)

Expand Down Expand Up @@ -480,12 +479,11 @@ def compute_metrics(p):
desc="Running tokenizer on the validation dataset",
)

ort_model = ORTModel(
optimized_model_path,
execution_provider=optim_args.execution_provider,
outputs = evaluation_loop(
model=model,
dataset=eval_dataset,
compute_metrics=compute_metrics,
)
outputs = ort_model.evaluation_loop(eval_dataset)

# Save evaluation metrics
with open(os.path.join(training_args.output_dir, "eval_results.json"), "w") as f:
Expand All @@ -509,12 +507,11 @@ def compute_metrics(p):
desc="Running tokenizer on the prediction dataset",
)

ort_model = ORTModel(
optimized_model_path,
execution_provider=optim_args.execution_provider,
outputs = evaluation_loop(
model=model,
dataset=predict_dataset,
compute_metrics=compute_metrics,
)
outputs = ort_model.evaluation_loop(predict_dataset)
predictions = np.argmax(outputs.predictions, axis=2)

# Remove ignored index (special tokens)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import sys
from dataclasses import dataclass, field
from functools import partial
from pathlib import Path
from typing import Optional

import datasets
Expand All @@ -38,7 +37,6 @@

from optimum.onnxruntime import ORTQuantizer
from optimum.onnxruntime.configuration import AutoCalibrationConfig, QuantizationConfig
from optimum.onnxruntime.model import ORTModel
from optimum.onnxruntime.modeling_ort import ORTModelForImageClassification
from optimum.onnxruntime.preprocessors import QuantizationPreprocessor
from optimum.onnxruntime.preprocessors.passes import (
Expand All @@ -47,6 +45,7 @@
ExcludeNodeAfter,
ExcludeNodeFollowedBy,
)
from optimum.onnxruntime.utils import evaluation_loop


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -378,13 +377,16 @@ def compute_metrics(p: EvalPrediction):
quantization_preprocessor.register_pass(ExcludeNodeFollowedBy("Add", "Softmax"))

# Apply quantization on the model
quantizer.quantize(
quantized_model_path = quantizer.quantize(
save_dir=training_args.output_dir,
calibration_tensors_range=ranges,
quantization_config=qconfig,
preprocessor=quantization_preprocessor,
use_external_data_format=onnx_export_args.use_external_data_format,
)
model = ORTModelForImageClassification.from_pretrained(
quantized_model_path, provider=optim_args.execution_provider
)

# Evaluation
if training_args.do_eval:
Expand All @@ -409,13 +411,12 @@ def compute_metrics(p: EvalPrediction):
# Set the validation transforms
eval_dataset = eval_dataset.with_transform(preprocess_function)

ort_model = ORTModel(
Path(training_args.output_dir) / "model_quantized.onnx",
execution_provider=optim_args.execution_provider,
compute_metrics=compute_metrics,
outputs = evaluation_loop(
model=model,
dataset=eval_dataset,
label_names=[labels_column],
compute_metrics=compute_metrics,
)
outputs = ort_model.evaluation_loop(eval_dataset)
# Save metrics
with open(os.path.join(training_args.output_dir, "eval_results.json"), "w") as f:
json.dump(outputs.metrics, f, indent=4, sort_keys=True)
Expand Down
Loading

0 comments on commit 87e66fb

Please sign in to comment.