diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..1735498 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,131 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, caste, color, religion, or sexual +identity and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the overall + community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or advances of + any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email address, + without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +CommunityCodeOfConduct AT intel DOT com. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series of +actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or permanent +ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within the +community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.1, available at +[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. + +Community Impact Guidelines were inspired by +[Mozilla's code of conduct enforcement ladder][Mozilla CoC]. + +For answers to common questions about this code of conduct, see the FAQ at +[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at +[https://www.contributor-covenant.org/translations][translations]. + +[homepage]: https://www.contributor-covenant.org +[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html +[Mozilla CoC]: https://github.com/mozilla/diversity +[FAQ]: https://www.contributor-covenant.org/faq \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..578d28d --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,57 @@ +# Contributing + +### License + + is licensed under the terms in [LICENSE]. By contributing to the project, you agree to the license and copyright terms therein and release your contribution under these terms. + +### Sign your work + +Please use the sign-off line at the end of the patch. Your signature certifies that you wrote the patch or otherwise have the right to pass it on as an open-source patch. The rules are pretty simple: if you can certify +the below (from [developercertificate.org](http://developercertificate.org/)): + +``` +Developer Certificate of Origin +Version 1.1 + +Copyright (C) 2004, 2006 The Linux Foundation and its contributors. +660 York Street, Suite 102, +San Francisco, CA 94110 USA + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. + +Developer's Certificate of Origin 1.1 + +By making a contribution to this project, I certify that: + +(a) The contribution was created in whole or in part by me and I + have the right to submit it under the open source license + indicated in the file; or + +(b) The contribution is based upon previous work that, to the best + of my knowledge, is covered under an appropriate open source + license and I have the right under that license to submit that + work with modifications, whether created in whole or in part + by me, under the same open source license (unless I am + permitted to submit under a different license), as indicated + in the file; or + +(c) The contribution was provided directly to me by some other + person who certified (a), (b) or (c) and I have not modified + it. + +(d) I understand and agree that this project and the contribution + are public and that a record of the contribution (including all + personal information I submit with it, including my sign-off) is + maintained indefinitely and may be redistributed consistent with + this project or the open source license(s) involved. +``` + +Then you just add a line to every git commit message: + + Signed-off-by: Joe Smith + +Use your real name (sorry, no pseudonyms or anonymous contributions.) + +If you set your `user.name` and `user.email` git configs, you can sign your +commit automatically with `git commit -s`. \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..aa57c80 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# E2E DLSA +DLSA is Intel optimized representative End-to-end Fine-Tuning & Inference pipeline for Document level sentiment analysis using BERT model implemented with Hugging face transformer API. + diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..5af6cb6 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,13 @@ +# Security Policy + +## Report a Vulnerability + +Please report security issues or vulnerabilities to the [Intel® Security Center]. + +For more information on how Intel® works to resolve security issues, see +[Vulnerability Handling Guidelines]. + +[Intel® Security Center]:https://www.intel.com/content/www/us/en/security-center/default.html + +[Vulnerability Handling Guidelines]:https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html + diff --git a/docs/_config.yml b/docs/_config.yml new file mode 100644 index 0000000..c419263 --- /dev/null +++ b/docs/_config.yml @@ -0,0 +1 @@ +theme: jekyll-theme-cayman \ No newline at end of file diff --git a/docs/assets/images/DLSA_workflow.PNG b/docs/assets/images/DLSA_workflow.PNG new file mode 100644 index 0000000..c504a56 Binary files /dev/null and b/docs/assets/images/DLSA_workflow.PNG differ diff --git a/docs/fine-tuning/multi-nodes-ipex.md b/docs/fine-tuning/multi-nodes-ipex.md new file mode 100644 index 0000000..a51558e --- /dev/null +++ b/docs/fine-tuning/multi-nodes-ipex.md @@ -0,0 +1,50 @@ +# How to Run DLSA Multi Instance Fine-Tuning with IPEX (FP32, BF16) + +## Install MPI library: + +Install MPI from [here]( https://anaconda.org/intel/impi_rt ) + +MPI is included in the Intel OneAPI Toolkit. It's recommended to use the package manager to install. + +> Note: This step should be operated on all the work nodes + +## To run: + +``` +source /opt/intel/oneapi/mpi/latest/env/vars.sh +cd profiling-transformers +``` + +> Note: +> +> np: num process, means how many processes you will run on a cluster +> +> ppn: process per node, means how many processes you will run on 1 worker node. +> +> For example, if I want to run on 2 nodes, each node runs with 1 process, use the config `-np 2 -ppn 1` +> +> if I want to run on 4 nodes, each node runs with 2 processes, use the config `-np 8 -ppn 2` + +### Running single process in single node + +``` +bash fine-tuning/run_dist.sh -np 1 -ppn 1 bash fine-tuning/run_ipex_native.sh +``` + +### Running multi instances in single node + +``` +# Run 2 instances in single node +bash fine-tuning/run_dist.sh -np 2 -ppn 2 bash fine-tuning/run_ipex_native.sh +``` + +### Running with IPEX BF16 + +> Before you run BF16 fine-tuning, you need to verify whether your server supports BF16. (Only Copper Lake & Sapphire Rapids CPUs support BF16) + +add `--bf16_ipex_ft` at the end of the command: + +``` +bash fine-tuning/run_dist.sh -np 2 -ppn 2 bash fine-tuning/run_ipex_native.sh --bf16_ipex_ft 1 +``` + diff --git a/docs/fine-tuning/multi-nodes-stock-pytorch.md b/docs/fine-tuning/multi-nodes-stock-pytorch.md new file mode 100644 index 0000000..10e4320 --- /dev/null +++ b/docs/fine-tuning/multi-nodes-stock-pytorch.md @@ -0,0 +1,44 @@ +# How to Run DLSA Multi Node Fine-Tuning with Stock PyTorch(FP32) + +## Install MPI library: + +Install MPI from [here]( https://anaconda.org/intel/impi_rt ) + + +MPI is included in the Intel OneAPI Toolkit. It's recommended to use the package manager to install. + +> Note: This step should be operated on all the work nodes + +## To run: + +``` +source /opt/intel/oneapi/mpi/latest/env/vars.sh +cd profiling-transformers +``` + +> Note: +> +> np: num process, means how many processes you will run on a cluster +> +> ppn: process per node, means how many processes you will run on 1 worker node. +> +> For example, if I want to run on 2 nodes, each node runs with 1 process, use the config `-np 2 -ppn 1` +> +> if I want to run on 4 nodes, each node runs with 2 processes, use the config `-np 8 -ppn 2` + +### Running single process in single node + +``` +bash fine-tuning/run_dist.sh -np 1 -ppn 1 bash fine-tuning/run_ipex_native.sh +``` + +### Running multi-node fine-tuning + +> You need to create the `hostfile` which contains all nodes you want to run on and set password-free login. + +``` +bash fine-tuning/run_dist.sh -np 2 -ppn 1 -f hostfile bash fine-tuning/run_ipex_native.sh +``` + + + diff --git a/docs/fine-tuning/single-node-ipex.md b/docs/fine-tuning/single-node-ipex.md new file mode 100644 index 0000000..4baf9cd --- /dev/null +++ b/docs/fine-tuning/single-node-ipex.md @@ -0,0 +1,28 @@ +# How to Run DLSA Single Node Fine-Tuning with IPEX(FP32, BF16) + +## Running on CPU + +### Single node + +``` +./fine-tuning/train_native.sh +``` + +By default, it will launch 1 instance to run fine-tuning with SST-2 dataset and FP32 precision. You can change the configurations in the file or pass parameters when running the script. + +Below is the help message by using the command `./fine-tuning/train_native.sh -h`: + +```markdown +Usage: ./fine-tuning/train_native.sh [OPTIONS] +OPTION includes: + -l | --log_name - the log name of this round + -d | --dataset - [imdb|sst2] wether to use imdb or sst2 DATASET + -b | --batch_size - batch size per instance + -s | --sequence_len - max sequence length + --bf16_ipex_ft - wether to use bf16_ipex_ft precision + --fp32_ipex_ft - wether to use fp32_ipex_ft precision + -h | --help - displays this message +``` + + + diff --git a/docs/fine-tuning/single-node-stock-pytorch.md b/docs/fine-tuning/single-node-stock-pytorch.md new file mode 100644 index 0000000..8b5f42f --- /dev/null +++ b/docs/fine-tuning/single-node-stock-pytorch.md @@ -0,0 +1,26 @@ +# How to Run DLSA Single Node Fine-Tuning Pipeline with Stock PyTorch + +## Running on CPU + +### Single node + +``` +./fine-tuning/train_native.sh +``` + +By default, it will launch 1 instance to run fine-tuning with SST-2 dataset and FP32 precision. You can change the configurations in the file or pass parameters when running the script. + +Below is the help message by using the command `./fine-tuning/train_native.sh -h`: + +```markdown +Usage: ./fine-tuning/train_native.sh [OPTIONS] +OPTION includes: + -l | --log_name - the log name of this round + -d | --dataset - [imdb|sst2] wether to use imdb or sst2 DATASET + -b | --batch_size - batch size per instance + -s | --sequence_len - max sequence length + ~~--bf16_ipex_ft - wether to use bf16_ipex_ft precision~~ + ~~--fp32_ipex_ft - wether to use fp32_ipex_ft precision~~ + -h | --help - displays this message +``` + diff --git a/docs/fine-tuning/single-node-trainer.md b/docs/fine-tuning/single-node-trainer.md new file mode 100644 index 0000000..11da772 --- /dev/null +++ b/docs/fine-tuning/single-node-trainer.md @@ -0,0 +1,28 @@ +# How to Run DLSA Single Node Fine-Tuning with Trainer(FP32, BF16) + +## Running on CPU + +### Single node + +``` +./fine-tuning/train_trainer.sh +``` + +By default, it will launch 1 instance to run fine-tuning with SST-2 dataset and FP32 precision. You can change the configurations in the file or pass parameters when running the script. + +Below is the help message by using the command `./fine-tuning/train_native.sh -h`: + +```markdown +Usage: ./fine-tuning/train_trainer.sh [OPTIONS] +OPTION includes: + -l | --log_name - the log name of this round + -d | --dataset - [imdb|sst2] wether to use imdb or sst2 DATASET + -b | --batch_size - batch size per instance + -s | --sequence_len - max sequence length + --bf16 - whether using hf bf16 inference + --use_ipex - whether using ipex + -h | --help - displays this message +``` + + + diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..61f8f2d --- /dev/null +++ b/docs/index.md @@ -0,0 +1,69 @@ +# Welcome to DLSA Pages + +DLSA is Intel optimized representative End-to-end Fine-Tuning & Inference pipeline for Document level sentiment analysis using BERT model implemented with Hugging face transformer API. + +![Image](assets/images/DLSA_workflow.PNG) + +## Prerequisites +### Download the repo + +``` +#download the repo +git clone https://github.com/intel/document-level-sentiment-analysis.git +cd frameworks.ai.end2end-ai-pipelines.dlsa/profiling-transformers +git checkout v1.0.0 +``` + +### Download the datasets: + +``` +mkdir datasets +cd datasets +#download and extract SST-2 dataset +wget https://dl.fbaipublicfiles.com/glue/data/SST-2.zip && unzip SST-2.zip && mv SST-2 sst +#download and extract IMDB dataset +wget http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz && tar -zxf aclImdb_v1.tar.gz +``` +>Note: Make sure the network connections work well for downloading the datasets. + +## Deploy the test environment +### Download Miniconda and install it + +``` +wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh +sh Miniconda3-latest-Linux-x86_64.sh +``` + +> Note: If you have already installed conda on your system, just skip this step. + +### Prepare the conda environment for DLSA + +``` +conda create -n dlsa python=3.8 --yes +conda activate dlsa +sh install.sh +``` + +## Running DLSA Inference Pipeline + +| Implementations | Model | API | Framework | Precision | +| -------------------------------------------------------- | -------- | ----------- | -------------- | -------------- | +| [Run with HF Transformers](inference/hf-transformers.md) | HF Model | Trainer | PyTorch + IPEX | FP32,BF16 | +| [Run with Stock Pytorch](inference/stock-pytorch.md) | HF Mode | Non-trainer | PyTorch | FP32 | +| [Run with IPEX](inference/ipex.md) | HF Mode | Non-trainer | PyTorch + IPEX | FP32,BF16,INT8 | + +## Running DLSA Fine-Tuning Pipeline + +### Single Node Fine-Tuning + +| Implementations | Model | Instance | API | Framework | Precision | +| ---------------------------------- | -------- | -------- | ----------- | ----------------------- | ---------- | +| [Run with HF Transformers + IPEX ](fine-tuning/single-node-trainer.md) | HF Model | Single | Trainer | PyTorch + IPEX | FP32, BF16 | +| [Run with Stock Pytorch](fine-tuning/single-node-stock-pytorch.md) | HF Model | Single | Non-trainer | PyTorch | FP32 | +| [Run with IPEX (Single Instance)](fine-tuning/single-node-ipex.md) | HF Model | Single | Non-trainer | PyTorch + IPEX | FP32,BF16 | +| [Run with IPEX (Multi Instance)](fine-tuning/multi-nodes-ipex.md) | HF Model | Multiple | Non-trainer | PyTorch + IPEX | FP32,BF16 | + + +## Issue Tracking +E2E DLSA tracks both bugs and enhancement requests using [Github](https://github.com/intel/document-level-sentiment-analysis/issues). We welcome input, however, before filing a request, please make sure you do the following: +Search the Github issue database. diff --git a/docs/inference/hf-transformers.md b/docs/inference/hf-transformers.md new file mode 100644 index 0000000..cf73841 --- /dev/null +++ b/docs/inference/hf-transformers.md @@ -0,0 +1,59 @@ +# How to Run DLSA Inference Pipeline with HF Transformers(FP32, BF16) + +## Support Matrix + +|Categoty | Script | +|---|---| +|CPU Single Instance | cpu_single_instance.sh | +|CPU Multi Instances | cpu_multi_instance.sh | + +> Note: Please use the fine-tuned model for correct accuracy. Just change the `MODEL_NAME_OR_PATH` in the script before you running. By default, the `MODEL_NAME_OR_PATH` is `bert-large-uncased` which is downloaded from the Hugging Face website. + +## Running on CPU + +### Single instance + +``` +./inference/cpu_single_instance.sh +``` + +By default, it will launch 1 instance to run inference with SST-2 dataset and FP32 precision. You can change the configurations in the file or pass parameters when running the script. + +Below is the help message by using the command `./inference/cpu_single_instance.sh -h`: + +```markdown +Usage: ./inference/cpu_single_instance.sh [OPTIONS] +OPTION includes: + -l | --log_name - the log name of this round + -d | --dataset - [imdb|sst2] wether to use imdb or sst2 DATASET + -b | --batch_size - batch size per instance + -s | --sequence_len - max sequence length + --bf16 - whether using hf bf16 inference + --use_ipex - whether using ipex + -h | --help - displays this message +``` + + + +### Multi-instance + +``` +./inference/cpu_multi_instance.sh +``` + +By default, it will launch 2 instances (1 instance/socket) to run inference with SST-2 dataset and FP32 precision. You can change the configurations in the file or pass parameters when running the script. + +Below is the help message by using the command `./inference/cpu_multi_instance.sh -h` + +```markdown +Usage: ./inference/cpu_multi_instance.sh [OPTIONS] +OPTION includes: + -l | --log_name - the log name of this round + -d | --dataset - [imdb|sst2] wether to use imdb or sst2 DATASET + -n | --num_of_ins_per_socket - number of instance per socket + -b | --batch_size - batch size per instance + -s | --sequence_len - max sequence length + --bf16 - whether using hf bf16 inference + --use_ipex - whether using ipex + -h | --help - displays this message +``` diff --git a/docs/inference/ipex.md b/docs/inference/ipex.md new file mode 100644 index 0000000..e91c5df --- /dev/null +++ b/docs/inference/ipex.md @@ -0,0 +1,65 @@ +# How to Run DLSA Inference Pipeline with IPEX(FP32, BF16, INT8) + +## Support Matrix + +| Categoty | Script | +| ------------------- | ------------------ | +| CPU Single Instance | single_instance.sh | +| CPU Multi Instances | multi_instance.sh | + +> Note: Please use the fine-tuned model for correct accuracy. Just change the `MODEL_NAME_OR_PATH` in the script before you running. By default, the `MODEL_NAME_OR_PATH` is `bert-large-uncased` which is downloaded from the Hugging Face website. + +## Running on CPU + +> Note: For int8 inference, you need to quantize the model firstly. Please see the details in this link: https://github.com/IntelAI/models/tree/master/quickstart/language_modeling/pytorch/bert_large/inference/cpu + +### Single instance + +``` +./inference/single_instance.sh +``` + +By default, it will launch 1 instance to run inference with SST-2 dataset and FP32 precision. You can change the configurations in the file or pass parameters when running the script. + +Below is the help message by using the command `./inference/single_instance.sh -h`: + +```markdown +Usage: ./inference/single_instance.sh [OPTIONS] +OPTION includes: + -l | --log_name - the log name of this round + -d | --dataset - [imdb|sst2] wether to use imdb or sst2 DATASET + -b | --batch_size - batch size per instance + -s | --sequence_len - max sequence length + --ipex_fp32 - wether to use ipex_fp32 precision + --ipex_bf16 - wether to use ipex_bf16 precision + --int8 - wether to use int8 precision + --int8_bf16 - wether to use int8_bf16 precision + -h | --help - displays this message +``` + + + +### Multi-instance + +``` +./inference/multi_instance.sh +``` + +By default, it will launch 2 instances (1 instance/socket) to run inference with SST-2 dataset and FP32 precision. You can change the configurations in the file or pass parameters when running the script. + +Below is the help message by using the command `./inference/multi_instance.sh -h` + +```markdown +Usage: ./inference/multi_instance.sh [OPTIONS] +OPTION includes: + -l | --log_name - the log name of this round + -d | --dataset - [imdb|sst2] wether to use imdb or sst2 DATASET + -n | --num_of_ins_per_socket - number of instance per socket + -b | --batch_size - batch size per instance + -s | --sequence_len - max sequence length + --ipex_fp32 - wether to use ipex_fp32 precision + --ipex_bf16 - wether to use ipex_bf16 precision + --int8 - wether to use int8 precision + --int8_bf16 - wether to use int8_bf16 precision + -h | --help - displays this message +``` diff --git a/docs/inference/stock-pytorch.md b/docs/inference/stock-pytorch.md new file mode 100644 index 0000000..2525fb6 --- /dev/null +++ b/docs/inference/stock-pytorch.md @@ -0,0 +1,64 @@ +# How to Run DLSA Inference Pipeline with Stock PyTorch + +## Support Matrix + +|Categoty | Script | +|---|---| +|CPU Single Instance | single_instance.sh | +|CPU Multi Instances | multi_instance.sh | + +> Note: Please use the fine-tuned model for correct accuracy. Just change the `MODEL_NAME_OR_PATH` in the script before you running. By default, the `MODEL_NAME_OR_PATH` is `bert-large-uncased` which is downloaded from the Hugging Face website. + +## Running on CPU + +### Single instance + +``` +./inference/single_instance.sh +``` + +By default, it will launch 1 instance to run inference with SST-2 dataset and FP32 precision. You can change the configurations in the file or pass parameters when running the script. + +Below is the help message by using the command `./inference/single_instance.sh -h`: + +```markdown +Usage: ./inference/single_instance.sh [OPTIONS] +OPTION includes: + -l | --log_name - the log name of this round + -d | --dataset - [imdb|sst2] wether to use imdb or sst2 DATASET + -b | --batch_size - batch size per instance + -s | --sequence_len - max sequence length + ~~--ipex_fp32 - wether to use ipex_fp32 precision~~ + ~~--ipex_bf16 - wether to use ipex_bf16 precision~~ + ~~--int8 - wether to use int8 precision~~ + ~~--int8_bf16 - wether to use int8_bf16 precision~~ + -h | --help - displays this message +``` + + + +### Multi-instance + +``` +./inference/multi_instance.sh +``` + +By default, it will launch 2 instances (1 instance/socket) to run inference with SST-2 dataset and FP32 precision. You can change the configurations in the file or pass parameters when running the script. + +Below is the help message by using the command `./inference/multi_instance.sh -h` + +```markdown +Usage: ./inference/multi_instance.sh [OPTIONS] +OPTION includes: + -l | --log_name - the log name of this round + -d | --dataset - [imdb|sst2] wether to use imdb or sst2 DATASET + -n | --num_of_ins_per_socket - number of instance per socket + -b | --batch_size - batch size per instance + -s | --sequence_len - max sequence length + ~~--ipex_fp32 - wether to use ipex_fp32 precision~~ + ~~--ipex_bf16 - wether to use ipex_bf16 precision~~ + ~~--int8 - wether to use int8 precision~~ + ~~--int8_bf16 - wether to use int8_bf16 precision~~ + -h | --help - displays this message +``` + diff --git a/profiling-transformers/.gitignore b/profiling-transformers/.gitignore new file mode 100644 index 0000000..1831872 --- /dev/null +++ b/profiling-transformers/.gitignore @@ -0,0 +1,149 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + + +############## +runs/ +.vscode/ +output/ +logs/ + +*.gz.* + +api_token.txt +fine_tuned/ +bert-base-uncased-tf +output-tf +fine_tuned-tf/ +few_shot_* +traced_bert.pt +results/ +models/ +wandb/ \ No newline at end of file diff --git a/profiling-transformers/deploy/install_torch_ccl.sh b/profiling-transformers/deploy/install_torch_ccl.sh new file mode 100755 index 0000000..3fa177d --- /dev/null +++ b/profiling-transformers/deploy/install_torch_ccl.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# Copyright (C) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. +# + +# + +GCC_GOOD=`gcc --version | awk '/gcc/ && ($3+0)>=8.3{print "1"}'` +if [ "x$GCC_GOOD" != "x1" ] ; then + echo "Requires gcc version later than 8.3.0" + exit 1 +fi + +pt_version=$(python -c "import torch; print(torch.__version__)" 2> /dev/null) +if [ "x$pt_version" == "x" ] ; then + echo "Can't find pytorch version, need PyTorch 1.9 or higher..." + exit 1 +fi + +branch=$(echo $pt_version | tr "." " " | awk '{print "ccl_torch" $1 "." $2}') + +if ! test -d ./torch-ccl ; then + git clone https://github.com/intel/torch-ccl.git +fi +cd torch-ccl +# workaround to disable linker error for linking to mkl libraries +# export CMAKE_FIND_DEBUG_MODE=ON +export CMAKE_DISABLE_FIND_PACKAGE_MKL=TRUE +git checkout $branch && git submodule sync && git submodule update --init --recursive && CC=gcc CXX=g++ CMAKE_C_COMPILER=gcc CMAKE_CXX_COMPILER=g++ python setup.py install + diff --git a/profiling-transformers/fine-tuning/run_dist.sh b/profiling-transformers/fine-tuning/run_dist.sh new file mode 100755 index 0000000..8d2e94d --- /dev/null +++ b/profiling-transformers/fine-tuning/run_dist.sh @@ -0,0 +1,202 @@ +#!/bin/bash +# Copyright (C) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. +# + +# + +function print_vars { + for VAR in ${!CCL*} ${!I_MPI*} ${!i_mpi*} ${!KMP_*} ${!OMP_*} ${!ATL_*} LD_PRELOAD ${!DLRM_*} ${!PYTORCH_*} ${!PCL_*} ${!LIBXSMM_*} ${!EMULATE_*} DATALOADER_WORKER_COUNT VIRTUAL_ENV ${!ARGS_*} $@ ; do + if ! test -z ${!VAR} ; then + echo "Using $VAR=${!VAR}" + fi + done +} + +SINGLE_SOCKET_ONLY=0 + +while (( "$#" )); do + case "$1" in + -n|-np) + ARGS_NTASKS=$2 + shift 2 + ;; + -ppn) + ARGS_PPN=$2 + shift 2 + ;; + -f) + ARGS_HOSTFILE=$2 + shift 2 + ;; + -sso) + SINGLE_SOCKET_ONLY=1 + shift + ;; + --) # end argument parsing + shift + break + ;; + -*|--*=) # unsupported flags + echo "Error: Unsupported flag $1" >&2 + exit 1 + ;; + *) # preserve positional arguments + break + ;; + esac +done + +if ! test -z $SLURM_JOB_ID ; then + PREFIX="srun -n 1 -N 1 " +else + PREFIX= +fi + +if ! test -z $ARGS_HOSTFILE ; then + if ! test -f $ARGS_HOSTFILE ; then + echo "Hostfile $ARGS_HOSTFILE does not exist!" ; exit 1 + else + OPT_HOSTFILE="-f $ARGS_HOSTFILE" + PREFIX="mpiexec.hydra -np 1 -ppn 1 -f $ARGS_HOSTFILE" + fi +fi + +CORES_PER_SOCKET=`$PREFIX lscpu | grep "Core(s) per socket" | awk '{print $NF}'` +NUM_SOCKETS=`$PREFIX lscpu | grep "Socket(s):" | awk '{print $NF}'` +NUM_NUMA_NODES=`$PREFIX lscpu | grep "NUMA node(s):" | awk '{print $NF}'` +THREADS_PER_CORE=`$PREFIX lscpu | grep "Thread(s) per core:" | awk '{print $NF}'` + +NNODES=1 +NP=1 +if [ $SINGLE_SOCKET_ONLY -eq 1 ] ; then +PPN=1 +else +PPN=$NUM_NUMA_NODES +fi + +if ! test -z $SLURM_NNODES ; then NNODES=$SLURM_NNODES ; fi +if ! test -z $SLURM_NTASKS ; then NP=$SLURM_NTASKS ; fi +if ! test -z $SLURM_NNODES && ! test -z $SLURM_NTASKS ; then PPN=$(( SLURM_NTASKS / SLURM_NNODES )) ; fi +if ! test -z $ARGS_NTASKS ; then NP=$ARGS_NTASKS ; fi +if ! test -z $ARGS_HOSTFILE ; then + NNODES=`cat $ARGS_HOSTFILE | sort -u | wc -l` +fi + +if ! test -z $ARGS_PPN ; then + PPN=$ARGS_PPN +fi +REAL_NNODES=$(( (NP + PPN - 1) / PPN )) +if [[ $REAL_NNODES -lt $NNODES ]] ; then NNODES=$REAL_NNODES ; fi + +if [ $(( NP % NNODES )) -ne 0 ] ; then + echo "Number of tasks ($NP) not multiple of number of nodes ($NNODES), exiting..." + exit 1 +fi + +PPN=$(( NP / NNODES )) + +echo "Running $NP tasks on $NNODES nodes with ppn=$PPN" + + +OPT_PPN="-ppn $PPN " + +if [ $SINGLE_SOCKET_ONLY -eq 1 ] ; then + NUM_THREADS=$(( CORES_PER_SOCKET / PPN )) +else + NUM_THREADS=$(( CORES_PER_SOCKET * NUM_SOCKETS / PPN )) +fi + +if [ "x${DATALOADER_WORKER_COUNT}" == "x" ] ; then +DATALOADER_WORKER_COUNT=0 +fi + +if [ $NP == 1 ] ; then +export CCL_WORKER_COUNT=0 +else +if [ "x${CCL_WORKER_COUNT}" == "x" ] ; then +export CCL_WORKER_COUNT=1 +fi +fi +CCL_WORKER_AFFINITY="" +PYTORCH_MPI_THREAD_AFFINITY="" + +NUM_RESV_THREADS=$(( CCL_WORKER_COUNT + DATALOADER_WORKER_COUNT )) +NUM_WORKER_THREADS=$(( NUM_THREADS - NUM_RESV_THREADS )) +USE_BC=1 +if ! which bc >& /dev/null ; then USE_BC=0 ; fi +for I in 0 1 2 3 ; do +SHFT=$(( NUM_RESV_THREADS + I )) +if [ $USE_BC -eq 1 ] ; then +PROC_MASK_STR[$I]=`BC_LINE_LENGTH=0 bc <<<"obase=16;(2^${NUM_WORKER_THREADS} - 1)*(2^${SHFT} )"` +else +PROC_MASK=$(( ( ( 1 << NUM_WORKER_THREADS ) - 1 ) << SHFT )) +PROC_MASK_STR[$I]=`printf "%X" $PROC_MASK` +fi +#echo "PROC_MASK_STR $I = ${PROC_MASK_STR[$I]}" +done +MASKS=( ) +for(( I=0; I < PPN ; I++)) ; do + SHFT=$(( I * NUM_THREADS )) + IND=$(( SHFT % 4 )) + if [ $SHFT -lt 4 ] ; then + ZEROS="" + else + ZEROS=`printf "%0*X" $(( SHFT / 4 ))` + fi + SMASK=${PROC_MASK_STR[$IND]}${ZEROS} + MASKS[$I]="0x$SMASK" + for((P=0;P < CCL_WORKER_COUNT ; P++)); do CCL_WORKER_AFFINITY="${CCL_WORKER_AFFINITY} $(( I * NUM_THREADS + P ))" ; done + PYTORCH_MPI_THREAD_AFFINITY="${PYTORCH_MPI_THREAD_AFFINITY} $(( I * NUM_THREADS ))" +done +export I_MPI_PIN_DOMAIN=[`echo ${MASKS[@]} | tr " " ","`] +export CCL_WORKER_AFFINITY=`echo ${CCL_WORKER_AFFINITY} | tr " " ","` +export OMP_NUM_THREADS=$(( NUM_THREADS - CCL_WORKER_COUNT - DATALOADER_WORKER_COUNT )) +export PYTORCH_MPI_THREAD_AFFINITY=`echo ${PYTORCH_MPI_THREAD_AFFINITY} | tr " " ","` + +which python icc gcc mpicc mpiexec.hydra 2> /dev/null + +echo "#### INITIAL ENV ####" +print_vars +echo "#### INITIAL ENV ####" + +echo "PyTorch version: `python -c "import torch; print(torch.__version__)" 2> /dev/null`" + +if ! test -z $SLURM_JOB_ID ; then +srun hostname | sort -u +fi + +export MASTER_ADDR=`$PREFIX hostname` +export MASTER_PORT=29500 +echo "MASTER_ADDR=$MASTER_ADDR" + +CMD=$1 +shift +ARGS="$@" + +MPIEXE_ARGS="-np $NP $OPT_PPN $OPT_HOSTFILE -l -genv I_MPI_PIN_DOMAIN=$I_MPI_PIN_DOMAIN -genv CCL_WORKER_AFFINITY=$CCL_WORKER_AFFINITY -genv CCL_WORKER_COUNT=$CCL_WORKER_COUNT -genv OMP_NUM_THREADS=$OMP_NUM_THREADS " + +#echo "Running mpiexec.hydra ${MPIEXE_ARGS} $CMD $@" +eval set -- "${MPIEXE_ARGS} hostname" +mpiexec.hydra $@ | sort +eval set -- "${MPIEXE_ARGS} $CMD $ARGS" +echo "Running mpiexec.hydra $@" +echo "Start Time: `date`" +SECONDS=0 +#mpiexec.hydra ${MPIEXE_ARGS} ${CMD} $@ +mpiexec.hydra $@ +echo "End Time: `date`" +duration=$SECONDS +echo "Total Time: $(($duration / 60)) min and $(($duration % 60)) sec" + diff --git a/profiling-transformers/fine-tuning/run_ipex_native.sh b/profiling-transformers/fine-tuning/run_ipex_native.sh new file mode 100755 index 0000000..4e41abf --- /dev/null +++ b/profiling-transformers/fine-tuning/run_ipex_native.sh @@ -0,0 +1,38 @@ +# Copyright (C) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. +# + +# + +# export CUDA_VISIBLE_DEVICES="-1"; \ +MODEL_NAME_OR_PATH="${MODEL_NAME_OR_PATH:-bert-large-uncased}" +DATASET="${DATASET:-sst2}" +MAX_SEQ_LEN=55 +NUM_TRAIN_EPOCHS=1 +OUTPUT_DIR="${OUTPUT_DIR:-fine_tuned}" +TRAINNING_BS=32 +INFERENCE_BS=8 + #--bf16_ft \ +python src/run_pt_native_ft.py \ + --model_name_or_path $MODEL_NAME_OR_PATH \ + --dataset $DATASET \ + --num_train_epochs $NUM_TRAIN_EPOCHS \ + --max_seq_len $MAX_SEQ_LEN \ + --output_dir $OUTPUT_DIR \ + --do_train \ + --per_device_train_batch_size $TRAINNING_BS \ + --do_predict \ + --per_device_eval_batch_size $INFERENCE_BS \ + --logging_strategy epoch \ + $@ diff --git a/profiling-transformers/fine-tuning/train_native.sh b/profiling-transformers/fine-tuning/train_native.sh new file mode 100755 index 0000000..d22fd65 --- /dev/null +++ b/profiling-transformers/fine-tuning/train_native.sh @@ -0,0 +1,114 @@ +# Copyright (C) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. +# + +# + +export LOG_NAME=`date "+%m%d-%H%M"` +export DATASET="sst2" +export BATCH_SIZE=32 +export SEQUENCE_LEN=55 +export BF16_IPEX_FT=0 +export FP32_IPEX_FT=0 +export TRAIN_EPOCH=1 +export MODEL_NAME_OR_PATH="bert-large-uncased" +export OUTPUT_DIR="${OUTPUT_DIR:-./logs}" + +while [ "$1" != "" ]; +do + case $1 in + -l | --log_name ) + shift + LOG_NAME="$1" + echo "log name is $LOG_NAME" + ;; + -d | --dataset ) + shift + DATASET="$1" + echo "dataset is : $DATASET" + ;; + -b | --batch_size ) + shift + BATCH_SIZE="$1" + echo "batch size per instance is : $BATCH_SIZE" + ;; + -s | --sequence_len ) + shift + SEQUENCE_LEN="$1" + echo "sequence_len is : $SEQUENCE_LEN" + ;; + --bf16_ipex_ft ) + BF16_IPEX_FT=1 + echo "bf16_ipex_ft is : $BF16_IPEX_FT" + ;; + --fp32_ipex_ft ) + FP32_IPEX_FT=1 + echo "fp32_ipex_ft is : $FP32_IPEX_FT" + ;; + -h | --help ) + echo "Usage: ./train_native.sh [OPTIONS]" + echo "OPTION includes:" + echo " -l | --log_name - the log name of this round" + echo " -d | --dataset - [imdb|sst2] wether to use imdb or sst2 DATASET" + echo " -b | --batch_size - batch size per instance" + echo " -s | --sequence_len - max sequence length" + echo " --bf16_ipex_ft - wether to use bf16_ipex_ft precision" + echo " --fp32_ipex_ft - wether to use fp32_ipex_ft precision" + echo " -h | --help - displays this message" + exit + ;; + * ) + echo "Invalid option: $1" + echo "Usage: train_native.sh [OPTIONS]" + echo "OPTION includes:" + echo " -l | --log_name - the log name of this round" + echo " -d | --dataset - [imdb|sst2] wether to use imdb or sst2 DATASET" + echo " -b | --batch_size - batch size per instance" + echo " -s | --sequence_len - max sequence length" + echo " --bf16_ipex_ft - wether to use bf16_ipex_ft precision" + echo " --fp32_ipex_ft - wether to use fp32_ipex_ft precision" + exit + ;; + esac + shift +done + +if [ -z "$LOG_NAME" ]; then + pre=`date "+%m%d-%H%M"` +else + pre=$LOG_NAME +fi + +OUTPUT_DIR=$OUTPUT_DIR'/'$pre'/'$DATASET +echo $OUTPUT_DIR + +mkdir -p $OUTPUT_DIR + + +export CUDA_VISIBLE_DEVICES="-1"; \ +python ./src/run_pt_native.py \ + --model_name_or_path $MODEL_NAME_OR_PATH \ + --dataset $DATASET \ + --bf16_ipex_ft $BF16_IPEX_FT \ + --fp32_ipex_ft $FP32_IPEX_FT \ + --output_dir $OUTPUT_DIR/output_test \ + --max_seq_len $SEQUENCE_LEN \ + --num_train_epochs $TRAIN_EPOCH \ + --do_train \ + --per_device_train_batch_size $BATCH_SIZE \ + --do_predict \ + --per_device_eval_batch_size 8 \ + 2>&1 | tee $OUTPUT_DIR/test_$i.log + + diff --git a/profiling-transformers/fine-tuning/train_trainer.sh b/profiling-transformers/fine-tuning/train_trainer.sh new file mode 100755 index 0000000..ece76b0 --- /dev/null +++ b/profiling-transformers/fine-tuning/train_trainer.sh @@ -0,0 +1,115 @@ +# Copyright (C) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. +# + +# + +export LOG_NAME=`date "+%m%d-%H%M"` +export DATASET="sst2" +export BATCH_SIZE=32 +export SEQUENCE_LEN=55 +export BF16="" +export USE_IPEX="" +export TRAIN_EPOCH=1 +export MODEL_NAME_OR_PATH="bert-large-uncased" +export OUTPUT_DIR="${OUTPUT_DIR:-./logs}" + +while [ "$1" != "" ]; +do + case $1 in + -l | --log_name ) + shift + LOG_NAME="$1" + echo "log name is $LOG_NAME" + ;; + -d | --dataset ) + shift + DATASET="$1" + echo "dataset is : $DATASET" + ;; + -b | --batch_size ) + shift + BATCH_SIZE="$1" + echo "batch size per instance is : $BATCH_SIZE" + ;; + -s | --sequence_len ) + shift + SEQUENCE_LEN="$1" + echo "sequence_len is : $SEQUENCE_LEN" + ;; + --bf16 ) + BF16="--bf16" + echo "use bf16" + ;; + --use_ipex ) + USE_IPEX=1 + echo "use_ipex is : $USE_IPEX" + ;; + -h | --help ) + echo "Usage: ./fine-tuning/train_trainer.sh [OPTIONS]" + echo "OPTION includes:" + echo " -l | --log_name - the log name of this round" + echo " -d | --dataset - [imdb|sst2] wether to use imdb or sst2 DATASET" + echo " -b | --batch_size - batch size per instance" + echo " -s | --sequence_len - max sequence length" + echo " --bf16 - whether using hf bf16 inference" + echo " --use_ipex - whether using ipex" + echo " -h | --help - displays this message" + exit + ;; + * ) + echo "Invalid option: $1" + echo "Usage: ./fine-tuning/train_trainer.sh [OPTIONS]" + echo "OPTION includes:" + echo " -l | --log_name - the log name of this round" + echo " -d | --dataset - [imdb|sst2] wether to use imdb or sst2 DATASET" + echo " -b | --batch_size - batch size per instance" + echo " -s | --sequence_len - max sequence length" + echo " --bf16 - whether using hf bf16 inference" + echo " --use_ipex - whether using ipex" + exit + ;; + esac + shift +done + +if [ -z "$LOG_NAME" ]; then + pre=`date "+%m%d-%H%M"` +else + pre=$LOG_NAME +fi + +OUTPUT_DIR=$OUTPUT_DIR'/'$pre'/'$DATASET +echo $OUTPUT_DIR + +mkdir -p $OUTPUT_DIR + + +export CUDA_VISIBLE_DEVICES="-1"; \ +python ./src/run_pt.py \ + --model_name_or_path $MODEL_NAME_OR_PATH \ + --dataset $DATASET \ + --output_dir $OUTPUT_DIR/output_test \ + --max_seq_len $SEQUENCE_LEN \ + --num_train_epochs $TRAIN_EPOCH \ + --do_train \ + --per_device_train_batch_size $BATCH_SIZE \ + --do_predict \ + --per_device_eval_batch_size 8 \ + --no_cuda \ + $BF16 \ + $USE_IPEX \ + 2>&1 | tee $OUTPUT_DIR/test_$i.log + + diff --git a/profiling-transformers/inference/cpu_multi_instance.sh b/profiling-transformers/inference/cpu_multi_instance.sh new file mode 100755 index 0000000..807ed99 --- /dev/null +++ b/profiling-transformers/inference/cpu_multi_instance.sh @@ -0,0 +1,193 @@ +# Copyright (C) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. +# + +# + +export KMP_SETTINGS=1 +export KMP_BLOCKTIME=1 +export OMP_MAX_ACTIVE_LEVELS=1 + +export LOG_NAME=`date "+%m%d-%H%M"` +export DATASET="sst2" +export NUMBER_OF_INSTANCE_PER_SOCKET=1 +export BATCH_SIZE=8 +export SEQUENCE_LEN=55 +export MODEL_NAME_OR_PATH="${MODEL_NAME_OR_PATH:-bert-large-uncased}" +export OUTPUT_DIR="${OUTPUT_DIR:-./logs}" +export USE_IPEX="" +export BF16="" + + +while [ "$1" != "" ]; +do + case $1 in + -l | --log_name ) + shift + LOG_NAME="$1" + echo "log name is $LOG_NAME" + ;; + -d | --dataset ) + shift + DATASET="$1" + echo "dataset is : $DATASET" + ;; + -n | --num_of_ins_per_socket ) + shift + NUMBER_OF_INSTANCE_PER_SOCKET="$1" + echo "number_of_instance_per_socket is : $NUMBER_OF_INSTANCE_PER_SOCKET" + ;; +# -c | --cores_per_instance ) +# shift +# cores_per_instance="$1" +# echo "cores_per_instance is : $cores_per_instance" +# ;; + -b | --batch_size ) + shift + BATCH_SIZE="$1" + echo "batch size per instance is : $BATCH_SIZE" + ;; + -s | --sequence_len ) + shift + SEQUENCE_LEN="$1" + echo "sequence_len is : $SEQUENCE_LEN" + ;; + --use_ipex ) + USE_IPEX="--use_ipex" + echo " use ipex" + ;; + --bf16 ) + BF16="--bf16" + echo "using hf bf16 inference" + ;; + -h | --help ) + echo "Usage: ./inference/cpu_multi_instance.sh [OPTIONS]" + echo "OPTION includes:" + echo " -l | --log_name - the log name of this round" + echo " -d | --dataset - [imdb|sst2] wether to use imdb or sst2 DATASET" + echo " -n | --num_of_ins_per_socket - number of instance per socket" +# echo " -c | --cores_per_instance - cores per instance" + echo " -b | --batch_size - batch size per instance" + echo " -s | --sequence_len - max sequence length" + echo " --bf16 - whether using hf bf16 inference" + echo " --use_ipex - whether using ipex" + echo " -h | --help - displays this message" + exit + ;; + * ) + echo "Invalid option: $1" + echo "Usage: inference/cpu_multi_instance.sh [OPTIONS]" + echo "OPTION includes:" + echo " -l | --log_name - the log name of this round" + echo " -d | --dataset - [imdb|sst2] wether to use imdb or sst2 DATASET" + echo " -n | --num_of_ins_per_socket - number of instance per socket" +# echo " -c | --cores_per_instance - cores per instance" + echo " -b | --batch_size - batch size per instance" + echo " -s | --sequence_len - max sequence length" + echo " --bf16 - whether using hf bf16 inference" + echo " --use_ipex - whether using ipex" + exit + ;; + esac + shift +done + +if [ -z "$LOG_NAME" ]; then + pre=`date "+%m%d-%H%M"` +else + pre=$LOG_NAME +fi + +if [ -z "$DATASET" ]; then + echo "Error: Please enter the DATASET ot use [imdb|sst2]" + exit +elif [ $DATASET != "imdb" -a $DATASET != "sst2" ]; then + echo "Error: The DATASET $DATASET cannot be recognized, please enter 'imdb' or 'sst2'" + exit +fi + +if [ -z "$NUMBER_OF_INSTANCE_PER_SOCKET" ]; then + echo "Error: Please set the instance number per socket using -n or --num_of_ins_per_socket" + exit +fi + +#if [ -z "$cores_per_instance" ]; then +# echo "Please set the core number per instance using -c or --cores_per_instance" +# exit +#fi + +if [ -z "$BATCH_SIZE" ]; then + echo "Error: Please set the batch size per instance using -b or --BATCH_SIZE" + exit +fi + +if [ -z $SEQUENCE_LEN ]; then + if [ $DATASET = 'imdb' ]; then + SEQUENCE_LEN=512 + elif [ $DATASET = 'sst2' ]; then + SEQUENCE_LEN=55 + fi + echo "WARNING: SEQUENCE_LEN is not set, using default DATASET ($DATASET) sequence length $SEQUENCE_LEN" +fi + + +all_core_number=`cat /proc/cpuinfo |grep "processor"|wc -l` +socket_number=`lscpu | grep "Socket(s)" | awk '{print $2}'` +core_number_per_socket=$(($all_core_number / $socket_number)) +instance_number=$(($NUMBER_OF_INSTANCE_PER_SOCKET * $socket_number)) + +if [ $(($core_number_per_socket % $NUMBER_OF_INSTANCE_PER_SOCKET)) != 0 ]; then + echo "\`instance_numberi_per_socket($NUMBER_OF_INSTANCE_PER_SOCKET)\` cannot be divisible by \`core_number_per_socket($core_number_per_socket)\`" + exit +else + cores_per_instance=$(($core_number_per_socket / $NUMBER_OF_INSTANCE_PER_SOCKET)) +fi + +if [ $DATASET = 'imdb' ]; then + max_test_samples=$((25000/$instance_number)) +else + max_test_samples=$((872/$instance_number)) +fi + +OUTPUT_DIR=$OUTPUT_DIR'/'$pre'/'$DATASET +echo "log directory is $OUTPUT_DIR" +mkdir -p $OUTPUT_DIR + + +for i in $(seq 1 $instance_number) +do + export OMP_NUM_THREADS=$cores_per_instance + start_index=$(( ($i-1) * $cores_per_instance)) + end_index=$(( ($i * $cores_per_instance) -1)) + mem_bind=$(( $start_index / $core_number_per_socket)) + echo "\`start core index\` is $start_index" + echo "\`end core index \` is $end_index" + echo "\`memory bind\` is $mem_bind" + str="numactl -C $start_index-$end_index -m $mem_bind" + echo $str + nohup numactl -C $start_index-$end_index -m $mem_bind python ./src/run_pt.py \ + --model_name_or_path $MODEL_NAME_OR_PATH \ + --dataset $DATASET \ + --multi_instance \ + --output_dir $OUTPUT_DIR/output_test \ + --do_predict \ + --max_seq_len $SEQUENCE_LEN \ + --instance_index $i \ + --max_test_samples $max_test_samples \ + --per_device_eval_batch_size $BATCH_SIZE \ + --no_cuda \ + $USE_IPEX \ + $BF16 \ + > $OUTPUT_DIR/test_$i.log 2>&1 & +done diff --git a/profiling-transformers/inference/cpu_single_instance.sh b/profiling-transformers/inference/cpu_single_instance.sh new file mode 100755 index 0000000..cb61c4f --- /dev/null +++ b/profiling-transformers/inference/cpu_single_instance.sh @@ -0,0 +1,111 @@ +# Copyright (C) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. +# + +# +# +# export CUDA_VISIBLE_DEVICES="-1"; \ +export LOG_NAME=`date "+%m%d-%H%M"` +export DATASET="sst2" +export BATCH_SIZE=8 +export SEQUENCE_LEN=55 +export MODEL_NAME_OR_PATH="${MODEL_NAME_OR_PATH:-bert-large-uncased}" +export OUTPUT_DIR="${OUTPUT_DIR:-./logs}" +export USE_IPEX="" +export BF16="" + +while [ "$1" != "" ]; +do + case $1 in + -l | --log_name ) + shift + LOG_NAME="$1" + echo "log name is $LOG_NAME" + ;; + -d | --dataset ) + shift + DATASET="$1" + echo "dataset is : $DATASET" + ;; + -b | --batch_size ) + shift + BATCH_SIZE="$1" + echo "batch size per instance is : $BATCH_SIZE" + ;; + -s | --sequence_len ) + shift + SEQUENCE_LEN="$1" + echo "sequence_len is : $SEQUENCE_LEN" + ;; + --use_ipex ) + USE_IPEX="--use_ipex" + echo " use ipex" + ;; + --bf16 ) + BF16="--bf16" + echo "using hf bf16 inference" + ;; + -h | --help ) + echo "Usage: ./inference/cpu_single_instance.sh [OPTIONS]" + echo "OPTION includes:" + echo " -l | --log_name - the log name of this round" + echo " -d | --dataset - [imdb|sst2] wether to use imdb or sst2 DATASET" + echo " -b | --batch_size - batch size per instance" + echo " -s | --sequence_len - max sequence length" + echo " --bf16 - whether using hf bf16 inference" + echo " --use_ipex - whether using ipex" + echo " -h | --help - displays this message" + exit + ;; + * ) + echo "Invalid option: $1" + echo "Usage: ./inference/cpu_single_instance.sh [OPTIONS]" + echo "OPTION includes:" + echo " -l | --log_name - the log name of this round" + echo " -d | --dataset - [imdb|sst2] wether to use imdb or sst2 DATASET" + echo " -b | --batch_size - batch size per instance" + echo " -s | --sequence_len - max sequence length" + echo " --bf16 - whether using hf bf16 inference" + echo " --use_ipex - whether using ipex" + exit + ;; + esac + shift +done + +if [ -z "$LOG_NAME" ]; then + pre=`date "+%m%d-%H%M"` +else + pre=$LOG_NAME +fi + +OUTPUT_DIR=$OUTPUT_DIR'/'$pre'/'$DATASET +echo $OUTPUT_DIR + +mkdir -p $OUTPUT_DIR + + +export CUDA_VISIBLE_DEVICES="-1"; \ +python ./src/run_pt.py \ + --model_name_or_path $MODEL_NAME_OR_PATH \ + --dataset $DATASET \ + --output_dir $OUTPUT_DIR/output_test \ + --do_predict \ + --max_seq_len $SEQUENCE_LEN \ + --per_device_eval_batch_size $BATCH_SIZE \ + --no_cuda \ + $USE_IPEX \ + $BF16 \ + 2>&1 | tee $OUTPUT_DIR/test_$i.log + diff --git a/profiling-transformers/inference/multi_instance.sh b/profiling-transformers/inference/multi_instance.sh new file mode 100755 index 0000000..c5f7965 --- /dev/null +++ b/profiling-transformers/inference/multi_instance.sh @@ -0,0 +1,219 @@ +#!/bin/bash +# Copyright (C) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. +# + +# +export KMP_SETTINGS=1 +export KMP_BLOCKTIME=1 +export OMP_MAX_ACTIVE_LEVELS=1 + +export LOG_NAME=`date "+%m%d-%H%M"` +export DATASET="sst2" +export NUMBER_OF_INSTANCE_PER_SOCKET=1 +export BATCH_SIZE=8 +export SEQUENCE_LEN=55 +export IPEX_BF16=0 +export IPEX_FP32=0 +export INT8=0 +export INT8_BF16=0 +export MODEL_NAME_OR_PATH="${MODEL_NAME_OR_PATH:-bert-large-uncased}" +export OUTPUT_DIR="${OUTPUT_DIR:-./logs}" + +while [ "$1" != "" ]; +do + case $1 in + -l | --log_name ) + shift + LOG_NAME="$1" + echo "log name is $LOG_NAME" + ;; + -d | --dataset ) + shift + DATASET="$1" + echo "dataset is : $DATASET" + ;; + -n | --num_of_ins_per_socket ) + shift + NUMBER_OF_INSTANCE_PER_SOCKET="$1" + echo "number_of_instance_per_socket is : $NUMBER_OF_INSTANCE_PER_SOCKET" + ;; +# -c | --cores_per_instance ) +# shift +# cores_per_instance="$1" +# echo "cores_per_instance is : $cores_per_instance" +# ;; + -b | --batch_size ) + shift + BATCH_SIZE="$1" + echo "batch size per instance is : $BATCH_SIZE" + ;; + -s | --sequence_len ) + shift + SEQUENCE_LEN="$1" + echo "sequence_len is : $SEQUENCE_LEN" + ;; + --ipex_bf16 ) + IPEX_BF16=1 + echo "ipex_bf16 is : $IPEX_BF16" + ;; + --ipex_fp32 ) + IPEX_FP32=1 + echo "ipex_fp32 is : $IPEX_FP32" + ;; + --int8 ) + INT8=1 + echo "int8 is : $INT8" + ;; + --int8_bf16 ) + INT8_BF16=1 + echo "int8_bf16 is : $INT8_BF16" + ;; + -h | --help ) + echo "Usage: ./inference/multi_instance.sh [OPTIONS]" + echo "OPTION includes:" + echo " -l | --log_name - the log name of this round" + echo " -d | --dataset - [imdb|sst2] wether to use imdb or sst2 DATASET" + echo " -n | --num_of_ins_per_socket - number of instance per socket" +# echo " -c | --cores_per_instance - cores per instance" + echo " -b | --batch_size - batch size per instance" + echo " -s | --sequence_len - max sequence length" + echo " --ipex_bf16 - wether to use ipex_bf16 precision" + echo " --ipex_fp32 - wether to use ipex_fp32 precision" + echo " --int8 - wether to use int8 precision" + echo " --int8_bf16 - wether to use int8_bf16 precision" + echo " -h | --help - displays this message" + exit + ;; + * ) + echo "Invalid option: $1" + echo "Usage: inference/multi_instance.sh [OPTIONS]" + echo "OPTION includes:" + echo " -l | --log_name - the log name of this round" + echo " -d | --dataset - [imdb|sst2] wether to use imdb or sst2 DATASET" + echo " -n | --num_of_ins_per_socket - number of instance per socket" +# echo " -c | --cores_per_instance - cores per instance" + echo " -b | --batch_size - batch size per instance" + echo " -s | --sequence_len - max sequence length" + echo " --ipex_bf16 - wether to use ipex_bf16 precision" + echo " --ipex_fp32 - wether to use ipex_fp32 precision" + echo " --int8 - wether to use int8 precision" + echo " --int8_bf16 - wether to use int8_bf16 precision" + exit + ;; + esac + shift +done + +if [ -z "$LOG_NAME" ]; then + pre=`date "+%m%d-%H%M"` +else + pre=$LOG_NAME +fi + +if [ -z "$DATASET" ]; then + echo "Error: Please enter the DATASET ot use [imdb|sst2]" + exit +elif [ $DATASET != "imdb" -a $DATASET != "sst2" ]; then + echo "Error: The DATASET $DATASET cannot be recognized, please enter 'imdb' or 'sst2'" + exit +fi + +if [ -z "$NUMBER_OF_INSTANCE_PER_SOCKET" ]; then + echo "Error: Please set the instance number per socket using -n or --num_of_ins_per_socket" + exit +fi + +#if [ -z "$cores_per_instance" ]; then +# echo "Please set the core number per instance using -c or --cores_per_instance" +# exit +#fi + +if [ $IPEX_BF16 = 1 ]; then + if [ $INT8 = 1 -o $INT8_BF16 = 1 ]; then + echo "Error: Cannot set IPEX_BF16 and INT8 at the same time" + exit + fi +else + if [ $INT8 = 0 -a $INT8_BF16 = 1 ]; then + echo "Error: Cannot set INT8_BF16 without INT8 option" + exit + fi +fi + +if [ -z "$BATCH_SIZE" ]; then + echo "Error: Please set the batch size per instance using -b or --BATCH_SIZE" + exit +fi + +if [ -z $SEQUENCE_LEN ]; then + if [ $DATASET = 'imdb' ]; then + SEQUENCE_LEN=512 + elif [ $DATASET = 'sst2' ]; then + SEQUENCE_LEN=55 + fi + echo "WARNING: SEQUENCE_LEN is not set, using default DATASET ($DATASET) sequence length $SEQUENCE_LEN" +fi + + +all_core_number=`cat /proc/cpuinfo |grep "processor"|wc -l` +socket_number=`lscpu | grep "Socket(s)" | awk '{print $2}'` +core_number_per_socket=$(($all_core_number / $socket_number)) +instance_number=$(($NUMBER_OF_INSTANCE_PER_SOCKET * $socket_number)) + +if [ $(($core_number_per_socket % $NUMBER_OF_INSTANCE_PER_SOCKET)) != 0 ]; then + echo "\`instance_numberi_per_socket($NUMBER_OF_INSTANCE_PER_SOCKET)\` cannot be divisible by \`core_number_per_socket($core_number_per_socket)\`" + exit +else + cores_per_instance=$(($core_number_per_socket / $NUMBER_OF_INSTANCE_PER_SOCKET)) +fi + +if [ $DATASET = 'imdb' ]; then + max_test_samples=$((25000/$instance_number)) +else + max_test_samples=$((872/$instance_number)) +fi + +OUTPUT_DIR=$OUTPUT_DIR'/'$pre'/'$DATASET +echo "log directory is $OUTPUT_DIR" +mkdir -p $OUTPUT_DIR + + +for i in $(seq 1 $instance_number) +do + export OMP_NUM_THREADS=$cores_per_instance + start_index=$(( ($i-1) * $cores_per_instance)) + end_index=$(( ($i * $cores_per_instance) -1)) + mem_bind=$(( $start_index / $core_number_per_socket)) + echo "\`start core index\` is $start_index" + echo "\`end core index \` is $end_index" + echo "\`memory bind\` is $mem_bind" + str="numactl -C $start_index-$end_index -m $mem_bind" + echo $str + nohup numactl -C $start_index-$end_index -m $mem_bind python ./src/run_pt_native_inf.py \ + --model_name_or_path $MODEL_NAME_OR_PATH \ + --dataset $DATASET \ + --int8 $INT8 \ + --int8_bf16 $INT8_BF16 \ + --ipex_bf16 $IPEX_BF16 \ + --ipex_fp32 $IPEX_FP32 \ + --multi_instance \ + --output_dir $OUTPUT_DIR/output_test \ + --do_predict \ + --max_seq_len $SEQUENCE_LEN \ + --instance_index $i \ + --max_test_samples $max_test_samples \ + --per_device_eval_batch_size $BATCH_SIZE \ + > $OUTPUT_DIR/test_$i.log 2>&1 & +done diff --git a/profiling-transformers/inference/single_instance.sh b/profiling-transformers/inference/single_instance.sh new file mode 100755 index 0000000..ea01f64 --- /dev/null +++ b/profiling-transformers/inference/single_instance.sh @@ -0,0 +1,126 @@ +# Copyright (C) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. +# + +# + +export LOG_NAME=`date "+%m%d-%H%M"` +export DATASET="sst2" +export BATCH_SIZE=8 +export SEQUENCE_LEN=55 +export IPEX_BF16=0 +export IPEX_FP32=0 +export INT8=0 +export INT8_BF16=0 +export MODEL_NAME_OR_PATH="${MODEL_NAME_OR_PATH:-bert-large-uncased}" +export OUTPUT_DIR="${OUTPUT_DIR:-./logs}" + +while [ "$1" != "" ]; +do + case $1 in + -l | --log_name ) + shift + LOG_NAME="$1" + echo "log name is $LOG_NAME" + ;; + -d | --dataset ) + shift + DATASET="$1" + echo "dataset is : $DATASET" + ;; + -b | --batch_size ) + shift + BATCH_SIZE="$1" + echo "batch size per instance is : $BATCH_SIZE" + ;; + -s | --sequence_len ) + shift + SEQUENCE_LEN="$1" + echo "sequence_len is : $SEQUENCE_LEN" + ;; + --ipex_bf16 ) + IPEX_BF16=1 + echo "ipex_bf16 is : $IPEX_BF16" + ;; + --ipex_fp32 ) + IPEX_FP32=1 + echo "ipex_fp32 is : $IPEX_FP32" + ;; + --int8 ) + INT8=1 + echo "int8 is : $INT8" + ;; + --int8_bf16 ) + INT8_BF16=1 + echo "int8_bf16 is : $INT8_BF16" + ;; + -h | --help ) + echo "Usage: ././inference/single_instance.sh [OPTIONS]" + echo "OPTION includes:" + echo " -l | --log_name - the log name of this round" + echo " -d | --dataset - [imdb|sst2] wether to use imdb or sst2 DATASET" + echo " -b | --batch_size - batch size per instance" + echo " -s | --sequence_len - max sequence length" + echo " --ipex_bf16 - wether to use ipex_bf16 precision" + echo " --ipex_fp32 - wether to use ipex_fp32 precision" + echo " --int8 - wether to use int8 precision" + echo " --int8_bf16 - wether to use int8_bf16 precision" + echo " -h | --help - displays this message" + exit + ;; + * ) + echo "Invalid option: $1" + echo "Usage: ./inference/single_instance.sh [OPTIONS]" + echo "OPTION includes:" + echo " -l | --log_name - the log name of this round" + echo " -d | --dataset - [imdb|sst2] wether to use imdb or sst2 DATASET" + echo " -b | --batch_size - batch size per instance" + echo " -s | --sequence_len - max sequence length" + echo " --ipex_bf16 - wether to use ipex_bf16 precision" + echo " --ipex_fp32 - wether to use ipex_fp32 precision" + echo " --int8 - wether to use int8 precision" + echo " --int8_bf16 - wether to use int8_bf16 precision" + exit + ;; + esac + shift +done + +if [ -z "$LOG_NAME" ]; then + pre=`date "+%m%d-%H%M"` +else + pre=$LOG_NAME +fi + +OUTPUT_DIR=$OUTPUT_DIR'/'$pre'/'$DATASET +echo $OUTPUT_DIR + +mkdir -p $OUTPUT_DIR + + +export CUDA_VISIBLE_DEVICES="-1"; \ +python ./src/run_pt_native_inf.py \ + --model_name_or_path $MODEL_NAME_OR_PATH \ + --dataset $DATASET \ + --int8 $INT8 \ + --int8_bf16 $INT8_BF16 \ + --ipex_bf16 $IPEX_BF16 \ + --ipex_fp32 $IPEX_FP32 \ + --output_dir $OUTPUT_DIR/output_test \ + --do_predict \ + --max_seq_len $SEQUENCE_LEN \ + --per_device_eval_batch_size $BATCH_SIZE \ + 2>&1 | tee $OUTPUT_DIR/test_$i.log + + diff --git a/profiling-transformers/install.sh b/profiling-transformers/install.sh new file mode 100755 index 0000000..4bb6d1f --- /dev/null +++ b/profiling-transformers/install.sh @@ -0,0 +1,20 @@ +#!/usr/bin/bash +# Copyright (C) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. +# + +# +conda install -y pytorch==1.12.1 torchvision torchaudio cpuonly intel-openmp gperftools ninja setuptools tqdm future cmake numpy pyyaml scikit-learn pydot -c pytorch -c intel -c conda-forge +pip install transformers==4.21.1 datasets==2.3.2 intel_extension_for_pytorch +bash deploy/install_torch_ccl.sh \ No newline at end of file diff --git a/profiling-transformers/src/__init__.py b/profiling-transformers/src/__init__.py new file mode 100644 index 0000000..356ca7b --- /dev/null +++ b/profiling-transformers/src/__init__.py @@ -0,0 +1,16 @@ +# Copyright (C) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. +# + +# diff --git a/profiling-transformers/src/run_pt.py b/profiling-transformers/src/run_pt.py new file mode 100644 index 0000000..7e5010b --- /dev/null +++ b/profiling-transformers/src/run_pt.py @@ -0,0 +1,137 @@ +# Copyright (C) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. +# + +# + +import logging + +from datasets import load_dataset +from transformers import ( + logging as hf_logging, + HfArgumentParser, + AutoTokenizer, + AutoModelForSequenceClassification, + Trainer, + TrainingArguments, +) + +from utils import ( + Arguments, + Benchmark, + compute_metrics, + save_train_metrics, + save_test_metrics, + check_dataset +) + +hf_logging.set_verbosity_info() +logger = logging.getLogger(__name__) + + +def main(): + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + parser = HfArgumentParser((Arguments, TrainingArguments)) + args, training_args = parser.parse_args_into_dataclasses() + + max_train, max_test = args.max_train_samples, args.max_test_samples + if args.smoke_test: + training_args.max_steps = 3 + max_train, max_test = 10, 10 + + bench = Benchmark() + track = bench.track + with track('Total Run'): + ############################ Load Data #################################### + with track('Load Data'): + data = load_dataset(*check_dataset(args.dataset)) + train_all = data['train'] + test_split = 'validation' if args.dataset == 'sst2' else 'test' + len_train = len(train_all) + train_data = train_all.select(range(len_train - max_train, len_train)) if max_train else train_all + + # split the Test Data for multi-instance + if args.multi_instance: + start_index = (args.instance_index - 1) * args.max_test_samples + end_index = args.instance_index * args.max_test_samples + test_data = data[test_split].select(range(start_index, end_index)) + print("start_index is ", start_index) + print("end_index is ", end_index) + print("test length is ", len(test_data)) + else: + test_data = data[test_split].select(range(max_test)) if max_test else data[test_split] + + text_column = [c for c in test_data.column_names if type(test_data[c][0]) != int][0] + + ############################### Pre-process ############################### + with track('Pre-process'): + with track('----Init tokenizer'): + tokenizer = AutoTokenizer.from_pretrained( + args.tokenizer_name if args.tokenizer_name else args.model_name_or_path + ) + + max_seq_len = min(args.max_seq_len, tokenizer.model_max_length) + + with track('----Tokenize + Extract Features'): + def preprocess(examples): + return tokenizer( + examples[text_column], + padding='max_length', + truncation=True, + max_length=max_seq_len + ) + + kwargs = dict( + function=preprocess, + batched=True, + num_proc=args.preprocessing_num_workers, + remove_columns=[text_column] + (['idx'] if args.dataset == 'sst2' else []), + load_from_cache_file=not args.overwrite_cache) + + train_data = train_data.map(**kwargs) if training_args.do_train else None + test_data = test_data.map(**kwargs) if training_args.do_predict else None + + ###################### Load Model and Trainer ############################ + with track('Load Model'): + model = AutoModelForSequenceClassification.from_pretrained(args.model_name_or_path) + + trainer = Trainer( + model=model, # the instantiated HF model to be trained + args=training_args, # training arguments, defined above + train_dataset=train_data, # training dataset + compute_metrics=compute_metrics, # evaluation metrics + tokenizer=tokenizer + ) + + ############################### Fine-Tune ################################# + if training_args.do_train: + with track('Fine-Tune'): + train_result = trainer.train() + trainer.save_model() + save_train_metrics(train_result, trainer, len(train_data)) + + ############################### Inference ################################# + test_metrics = "" + if training_args.do_predict: + with track('Inference'): + preds, _, metrics = trainer.predict(test_data) + test_metrics = save_test_metrics(metrics, len(test_data), training_args.output_dir) + + bench.summary() + print(test_metrics) + + +if __name__ == "__main__": + main() diff --git a/profiling-transformers/src/run_pt_native.py b/profiling-transformers/src/run_pt_native.py new file mode 100644 index 0000000..108abaf --- /dev/null +++ b/profiling-transformers/src/run_pt_native.py @@ -0,0 +1,249 @@ +# Copyright (C) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. +# + +# + +from pathlib import Path +import os +import logging +from tqdm import tqdm + +import numpy as np +import torch +from torch.utils.data import DataLoader +from torch import tensor + +try: + import intel_extension_for_pytorch as ipex +finally: + pass + +import transformers +from transformers import ( + HfArgumentParser, + AutoTokenizer, + AutoModelForSequenceClassification, + TrainingArguments, +) + +from utils import ( + Arguments, + read_dataset, + to_tensor_dataset, + Benchmark, + compute_metrics, + PredsLabels +) + +transformers.logging.set_verbosity_info() + +logger = logging.getLogger(__name__) + + +def main(): + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + + parser = HfArgumentParser((Arguments, TrainingArguments)) + args, training_args = parser.parse_args_into_dataclasses() + output_dir = Path(training_args.output_dir) + os.makedirs(output_dir, exist_ok=True) + bench = Benchmark() + track = bench.track + + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + + def to_inputs(batch: dict) -> dict: + return {k: (v if torch.is_tensor(v) else tensor(v)).to(device=device) \ + for k, v in batch.items()} + + ################################# Load Data ################################# + + with track('Load Data'): + if training_args.do_train: + # Train Data + train_texts, train_labels = read_dataset(args.dataset, 'train') + max_train = args.max_train_samples if args.max_train_samples else len(train_texts) + if args.smoke_test: + training_args.max_steps = 3 + training_args.num_train_epochs = 1 + max_train = 104 + train_texts, train_labels = train_texts[:max_train], train_labels[:max_train] + + if training_args.do_predict: + max_test = 100 if args.smoke_test else (args.max_test_samples if args.max_test_samples else None) + + if not args.real_time: + # Test Data + test_texts, test_labels = read_dataset(args.dataset, 'test') + if args.multi_instance: + start_index = (args.instance_index - 1) * args.max_test_samples + end_index = args.instance_index * args.max_test_samples + test_texts, test_labels = test_texts[start_index:end_index], test_labels[start_index:end_index] + print("start_index is ", start_index) + print("end_index is ", end_index) + print("test text length is ", len(test_texts)) + print("test labels length is ", len(test_labels)) + else: + test_texts, test_labels = test_texts[:max_test], test_labels[:max_test] + + ################################# Pre-process ################################# + with track('Pre-process'): + with track('----Init tokenizer'): + # Tokenization + Feature Extraction + tokenizer = AutoTokenizer.from_pretrained( + args.tokenizer_name if args.tokenizer_name else args.model_name_or_path + ) + max_seq_len = min(args.max_seq_len, tokenizer.model_max_length) + token_args = dict(truncation=True, padding=True, max_length=max_seq_len) + + if training_args.do_train: + with track('----Training data encoding'): + train_encodings = tokenizer(train_texts, **token_args) + with track('----Training tensor data convert'): + train_dataset = to_tensor_dataset('pt', train_encodings, train_labels) + + if training_args.do_predict and not args.real_time: + with track('----PyTorch test data encoding'): + test_encodings = tokenizer(test_texts, padding='max_length', max_length=max_seq_len, + truncation=True) + with track('----PyTorch test tensor data convert'): + test_dataset = to_tensor_dataset('pt', test_encodings, test_labels) + + ################################# Load Model ################################# + if training_args.do_train or not args.torchscript: + with track('Load Model'): + if args.bf16_ipex_ft: + with torch.cpu.amp.autocast(): + model = AutoModelForSequenceClassification \ + .from_pretrained(args.model_name_or_path) \ + .to(device=device) + model = ipex.optimize(model, dtype=torch.bfloat16, level='O0') + else: + model = AutoModelForSequenceClassification \ + .from_pretrained(args.model_name_or_path) \ + .to(device=device) + + if args.fp32_ipex_ft: + model = ipex.optimize(model, dtype=torch.float32, level='O1') + + with track("Process int8 model"): + if args.int8: + # convert fp32 model to int8 + ipex.nn.utils._model_convert.replace_dropout_with_identity(model) + conf = ipex.quantization.QuantConf(configure_file=args.model_name_or_path + "/configure.json") + dumpy_tensor = torch.ones((training_args.per_device_eval_batch_size, max_seq_len), dtype=torch.long) + jit_inputs = (dumpy_tensor, dumpy_tensor, dumpy_tensor) + if args.int8_bf16: + with torch.cpu.amp.autocast(): + model = ipex.quantization.convert(model, conf, jit_inputs) + else: + model = ipex.quantization.convert(model, conf, jit_inputs) + with torch.no_grad(): + y = model(dumpy_tensor, dumpy_tensor, dumpy_tensor) + y = model(dumpy_tensor, dumpy_tensor, dumpy_tensor) + + with track("Process bf16 model"): + if args.ipex_bf16: + # convert fp32 model to bf16 + with torch.cpu.amp.autocast(), torch.no_grad(): + torch.jit.load('imdb_bf16model.pt') + model = ipex.optimize(model, dtype=torch.bfloat16, level='O0') + dumpy_tensor = torch.ones((training_args.per_device_eval_batch_size, max_seq_len), dtype=torch.long) + jit_inputs = (dumpy_tensor, dumpy_tensor, dumpy_tensor) + with torch.cpu.amp.autocast(), torch.no_grad(): + model = torch.jit.trace(model, jit_inputs, strict=False) + model = torch.jit.freeze(model) + with torch.no_grad(): + y = model(dumpy_tensor, dumpy_tensor, dumpy_tensor) + y = model(dumpy_tensor, dumpy_tensor, dumpy_tensor) + + ################################ Fine-Tune ################################# + if training_args.do_train: + with track('Fine-Tune'): + with track('--------Init Fine-Tuning'): + batch_size = training_args.per_device_train_batch_size + model.train() + weight_decay = 0.0 + no_decay = ["bias", "LayerNorm.weight"] + optimizer_grouped_parameters = [ + { + "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], + "weight_decay": weight_decay, + }, + { + "params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], + "weight_decay": 0.0, + }, + ] + optim = torch.optim.AdamW(optimizer_grouped_parameters, lr=training_args.learning_rate) + + with track('--------Training Loop'): + for _ in tqdm(range(int(training_args.num_train_epochs)), desc='Epoch'): + for batch in tqdm(DataLoader(train_dataset, batch_size=batch_size, shuffle=True), + desc='Train Step'): + optim.zero_grad() + loss = model(**to_inputs(batch))[0] + loss.backward() + optim.step() + + with track('--------Save Fine-Tuned Model'): + if args.torchscript: + with track('--------Save TorchScript model'): + model.eval() + batch = to_inputs(batch) + traced_model = torch.jit.trace(model, [batch['input_ids'], batch['attention_mask']]) + torch.jit.save(traced_model, output_dir / "traced_model.pt") + else: + torch.save(model.state_dict(), output_dir / "pytorch_model.bin") + + ############################### Inference ################################# + if training_args.do_predict: + with track('Inference'): + if args.torchscript: + with track('--------Load TorchScript model'): + model_path = output_dir if training_args.do_train else Path(args.model_name_or_path) + model = torch.jit.load(model_path / "traced_model.pt").to(device=device) + + batch_size = training_args.per_device_eval_batch_size + all_outputs, all_labels = [], [] + + def prediction_step(batch, labels): + all_labels.extend(labels) + inputs = to_inputs(batch) + output = model(inputs['input_ids'], inputs['attention_mask']) if args.torchscript \ + else model(**inputs) + all_outputs.append(output['logits'].detach().cpu()) + + model.eval() + with torch.no_grad(): + if args.real_time: + data_generator = read_dataset(args.dataset, 'test', generator=True, \ + batch_size=batch_size, max_samples=max_test) + + for texts, labels in tqdm(data_generator, desc='Test Step'): + prediction_step(batch=tokenizer(texts, **token_args), labels=labels) + + else: + for batch in tqdm(DataLoader(test_dataset, batch_size=batch_size), desc='Test Step'): + prediction_step(batch=batch, labels=batch.pop('labels')) + acc = compute_metrics(PredsLabels(preds=np.concatenate(all_outputs), labels=all_labels)) + print(f"\n*********** TEST_METRICS ***********\nAccuracy: {acc['acc']}\n") + + bench.summary() + + +if __name__ == "__main__": + main() diff --git a/profiling-transformers/src/run_pt_native_ft.py b/profiling-transformers/src/run_pt_native_ft.py new file mode 100644 index 0000000..1040ced --- /dev/null +++ b/profiling-transformers/src/run_pt_native_ft.py @@ -0,0 +1,286 @@ +# Copyright (C) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. +# + +# + +from pathlib import Path +import os +import logging +from tqdm import tqdm + +import numpy as np +import torch +from torch.utils.data import DataLoader, RandomSampler +from torch.utils.data.distributed import DistributedSampler +from torch import tensor + +try: + import intel_extension_for_pytorch as ipex +finally: + pass + +import transformers +from transformers import ( + HfArgumentParser, + AutoTokenizer, + AutoModelForSequenceClassification, + TrainingArguments, + set_seed, +) + +from utils import ( + Arguments, + read_dataset, + to_tensor_dataset, + Benchmark, + compute_metrics, + PredsLabels +) + +transformers.logging.set_verbosity_info() + +logger = logging.getLogger(__name__) + + +def main(): + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + + parser = HfArgumentParser((Arguments, TrainingArguments)) + args, training_args = parser.parse_args_into_dataclasses() + output_dir = Path(training_args.output_dir) + os.makedirs(output_dir, exist_ok=True) + bench = Benchmark() + track = bench.track + + set_seed(training_args.seed) + + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + + if int(os.environ.get('PMI_SIZE', '0')) > 1 and not args.multi_instance: + if args.dist_backend == 'ccl': + try: + import oneccl_bindings_for_pytorch + except: + print("CCL backend requested but import oneccl_bindings_for_pytorch failed") + raise + elif args.dist_backend == 'mpi': + if not torch.distributed.is_mpi_available(): + try: + import torch_mpi + except: + print("MPI backend requested but not available try installing torch_mpi module") + raise + else: + raise ValueError(f"{args.dist_backend} backend requested but not supported") + + os.environ['RANK'] = os.environ.get('PMI_RANK', '0') + os.environ['WORLD_SIZE'] = os.environ.get('PMI_SIZE', '1') + torch.distributed.init_process_group(backend=args.dist_backend) + device = torch.device("cpu") + training_args.local_rank = torch.distributed.get_rank() + if training_args.local_rank == 0: print(f"##################Using {args.dist_backend.upper()} dist run with {torch.distributed.get_world_size()} ranks", flush=True) + + def to_inputs(batch: dict) -> dict: + return {k: (v if torch.is_tensor(v) else tensor(v)).to(device=device) \ + for k, v in batch.items()} + + ################################# Load Data ################################# + + with track('Load Data'): + if training_args.do_train: + # Train Data + train_texts, train_labels = read_dataset(args.dataset, 'train') + max_train = args.max_train_samples if args.max_train_samples else len(train_texts) + if args.smoke_test: + training_args.max_steps = 3 + training_args.num_train_epochs = 1 + max_train = 104 + train_texts, train_labels = train_texts[:max_train], train_labels[:max_train] + + if training_args.do_predict: + max_test = 100 if args.smoke_test else (args.max_test_samples if args.max_test_samples else None) + + if not args.real_time: + # Test Data + test_texts, test_labels = read_dataset(args.dataset, 'test') + if args.multi_instance: + start_index = (args.instance_index - 1) * args.max_test_samples + end_index = args.instance_index * args.max_test_samples + test_texts, test_labels = test_texts[start_index:end_index], test_labels[start_index:end_index] + print("start_index is ", start_index) + print("end_index is ", end_index) + print("test text length is ", len(test_texts)) + print("test labels length is ", len(test_labels)) + else: + test_texts, test_labels = test_texts[:max_test], test_labels[:max_test] + + ################################# Pre-process ################################# + with track('Pre-process'): + with track('----Init tokenizer'): + # Tokenization + Feature Extraction + tokenizer = AutoTokenizer.from_pretrained( + args.tokenizer_name if args.tokenizer_name else args.model_name_or_path + ) + max_seq_len = min(args.max_seq_len, tokenizer.model_max_length) + token_args = dict(truncation=True, padding=True, max_length=max_seq_len) + + if training_args.do_train: + with track('----Training data encoding'): + train_encodings = tokenizer(train_texts, **token_args) + with track('----Training tensor data convert'): + train_dataset = to_tensor_dataset('pt', train_encodings, train_labels) + + if training_args.do_predict and not args.real_time: + with track('----PyTorch test data encoding'): + test_encodings = tokenizer(test_texts, padding='max_length', max_length=max_seq_len, + truncation=True) + with track('----PyTorch test tensor data convert'): + test_dataset = to_tensor_dataset('pt', test_encodings, test_labels) + + ################################# Load Model ################################# + if training_args.do_train or not args.torchscript: + with track('Load Model'): + if args.bf16_ipex_ft: + with torch.cpu.amp.autocast(): + model = AutoModelForSequenceClassification \ + .from_pretrained(args.model_name_or_path) \ + .to(device=device) + model = ipex.optimize(model, dtype=torch.bfloat16, level='O0') + else: + model = AutoModelForSequenceClassification \ + .from_pretrained(args.model_name_or_path) \ + .to(device=device) + #model = AutoModelForSequenceClassification \ + # .from_pretrained(args.model_name_or_path) \ + # .to(device=device) + + with track("Process int8 model"): + if args.int8: + # convert fp32 model to int8 + ipex.nn.utils._model_convert.replace_dropout_with_identity(model) + conf = ipex.quantization.QuantConf(configure_file=args.model_name_or_path + "/configure.json") + dumpy_tensor = torch.ones((training_args.per_device_eval_batch_size, max_seq_len), dtype=torch.long) + jit_inputs = (dumpy_tensor, dumpy_tensor, dumpy_tensor) + if args.int8_bf16: + with torch.cpu.amp.autocast(): + model = ipex.quantization.convert(model, conf, jit_inputs) + else: + model = ipex.quantization.convert(model, conf, jit_inputs) + with torch.no_grad(): + y = model(dumpy_tensor, dumpy_tensor, dumpy_tensor) + y = model(dumpy_tensor, dumpy_tensor, dumpy_tensor) + + with track("Process bf16 model"): + if args.ipex_bf16: + # convert fp32 model to bf16 + with torch.cpu.amp.autocast(), torch.no_grad(): + torch.jit.load('imdb_bf16model.pt') + model = ipex.optimize(model, dtype=torch.bfloat16, level='O0') + dumpy_tensor = torch.ones((training_args.per_device_eval_batch_size, max_seq_len), dtype=torch.long) + jit_inputs = (dumpy_tensor, dumpy_tensor, dumpy_tensor) + with torch.cpu.amp.autocast(), torch.no_grad(): + model = torch.jit.trace(model, jit_inputs, strict=False) + model = torch.jit.freeze(model) + with torch.no_grad(): + y = model(dumpy_tensor, dumpy_tensor, dumpy_tensor) + y = model(dumpy_tensor, dumpy_tensor, dumpy_tensor) + + ################################ Fine-Tune ################################# + if training_args.do_train: + with track('Fine-Tune'): + with track('--------Init Fine-Tuning'): + batch_size = training_args.per_device_train_batch_size + model.train() + weight_decay = 0.0 + no_decay = ["bias", "LayerNorm.weight"] + optimizer_grouped_parameters = [ + { + "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], + "weight_decay": weight_decay, + }, + { + "params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], + "weight_decay": 0.0, + }, + ] + optim = torch.optim.AdamW(optimizer_grouped_parameters, lr=training_args.learning_rate) + if training_args.local_rank != -1: + model = torch.nn.parallel.DistributedDataParallel(model) + + with track('--------Training Loop'): + train_sampler = RandomSampler(train_dataset) if training_args.local_rank == -1 else DistributedSampler(train_dataset) + + for _ in tqdm(range(int(training_args.num_train_epochs)), desc='Epoch', disable=training_args.local_rank not in [-1, 0]): + for batch in tqdm(DataLoader(train_dataset, sampler=train_sampler, batch_size=batch_size), + desc='Train Step', disable=training_args.local_rank not in [-1, 0]): + optim.zero_grad() + loss = model(**to_inputs(batch))[0] + loss.backward() + optim.step() + + with track('--------Save Fine-Tuned Model'): + if training_args.local_rank in [-1, 0]: + # Take care of DDP wrapper + model_to_save = model.module if hasattr(model, "module") else model + if args.torchscript: + with track('--------Save TorchScript model'): + model.eval() + batch = to_inputs(batch) + traced_model = torch.jit.trace(model_to_save, [batch['input_ids'], batch['attention_mask']]) + torch.jit.save(traced_model, output_dir / "traced_model.pt") + else: + torch.save(model_to_save.state_dict(), output_dir / "pytorch_model.bin") + + ############################### Inference ################################# + if training_args.do_predict: + with track('Inference'): + if args.torchscript: + with track('--------Load TorchScript model'): + model_path = output_dir if training_args.do_train else Path(args.model_name_or_path) + model = torch.jit.load(model_path / "traced_model.pt").to(device=device) + + batch_size = training_args.per_device_eval_batch_size + all_outputs, all_labels = [], [] + + def prediction_step(batch, labels): + all_labels.extend(labels) + inputs = to_inputs(batch) + output = model(inputs['input_ids'], inputs['attention_mask']) if args.torchscript \ + else model(**inputs) + all_outputs.append(output['logits'].detach().cpu()) + + model.eval() + with torch.no_grad(): + if args.real_time: + data_generator = read_dataset(args.dataset, 'test', generator=True, \ + batch_size=batch_size, max_samples=max_test) + + for texts, labels in tqdm(data_generator, desc='Test Step'): + prediction_step(batch=tokenizer(texts, **token_args), labels=labels) + + else: + test_sampler = RandomSampler(test_dataset) if training_args.local_rank == -1 else DistributedSampler(test_dataset) + + for batch in tqdm(DataLoader(test_dataset, sampler=test_sampler, batch_size=batch_size), desc='Test Step'): + prediction_step(batch=batch, labels=batch.pop('labels')) + acc = compute_metrics(PredsLabels(preds=np.concatenate(all_outputs), labels=all_labels)) + print(f"\n*********** TEST_METRICS ***********\nAccuracy: {acc['acc']}\n") + + bench.summary() + + +if __name__ == "__main__": + main() diff --git a/profiling-transformers/src/run_pt_native_inf.py b/profiling-transformers/src/run_pt_native_inf.py new file mode 100644 index 0000000..935906f --- /dev/null +++ b/profiling-transformers/src/run_pt_native_inf.py @@ -0,0 +1,223 @@ +# Copyright (C) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. +# + +# + +import logging +import os + +import numpy as np +import torch +from datasets import load_dataset +from torch import tensor +from torch.utils.data import DataLoader +from tqdm import tqdm + +try: + import intel_extension_for_pytorch as ipex +finally: + pass + +from transformers import ( + logging as hf_logging, + HfArgumentParser, + AutoTokenizer, + AutoModelForSequenceClassification, + TrainingArguments, + DataCollatorWithPadding +) + +from utils import ( + Arguments, + Benchmark, + compute_metrics, + PredsLabels, + check_dataset +) + +hf_logging.set_verbosity_info() +logger = logging.getLogger(__name__) + + +def main(): + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + parser = HfArgumentParser((Arguments, TrainingArguments)) + args, training_args = parser.parse_args_into_dataclasses() + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + + max_train, max_test = args.max_train_samples, args.max_test_samples + if args.smoke_test: + training_args.max_steps = 3 + max_train, max_test = 10, 10 + + bench = Benchmark() + track = bench.track + with track('Total Run'): + ############################ Load Data #################################### + with track('Load Data'): + data = load_dataset(*check_dataset(args.dataset)) + train_all = data['train'] + test_split = 'validation' if args.dataset == 'sst2' else 'test' + len_train = len(train_all) + train_data = train_all.select(range(len_train - max_train, len_train)) if max_train else train_all + + # split the Test Data for multi-instance + if args.multi_instance: + start_index = (args.instance_index - 1) * args.max_test_samples + end_index = args.instance_index * args.max_test_samples + test_data = data[test_split].select(range(start_index, end_index)) + print("start_index is ", start_index) + print("end_index is ", end_index) + print("test length is ", len(test_data)) + else: + test_data = data[test_split].select(range(max_test)) if max_test else data[test_split] + + text_column = [c for c in test_data.column_names if type(test_data[c][0]) != int][0] + + ############################### Pre-process ############################### + with track('Pre-process'): + with track('----Init tokenizer'): + tokenizer = AutoTokenizer.from_pretrained( + args.tokenizer_name if args.tokenizer_name else args.model_name_or_path + ) + + max_seq_len = min(args.max_seq_len, tokenizer.model_max_length) + + with track('----Tokenize + Extract Features'): + def preprocess(examples): + return tokenizer( + examples[text_column], + padding='max_length', + truncation=True, + max_length=max_seq_len + ) + + kwargs = dict( + function=preprocess, + batched=True, + num_proc=args.preprocessing_num_workers, + remove_columns=[text_column] + (['idx'] if args.dataset == 'sst2' else []), + load_from_cache_file=not args.overwrite_cache) + + train_data = train_data.map(**kwargs) if training_args.do_train else None + test_data = test_data.map(**kwargs) if training_args.do_predict else None + + ###################### Load Model and Trainer ############################ + with track('Load Model'): + model = AutoModelForSequenceClassification.from_pretrained(args.model_name_or_path).to(device=device) + + with track("Process int8 model"): + if args.int8: + # convert fp32 model to int 8 + dumpy_tensor = torch.ones((training_args.per_device_eval_batch_size, max_seq_len), dtype=torch.long) + jit_inputs = (dumpy_tensor, dumpy_tensor, dumpy_tensor) + + if os.path.exists(args.model_name_or_path + "/quantized_model.pt"): + print("load int8 model-----------------------") + if args.int8_bf16: + with torch.cpu.amp.autocast(): + model = torch.jit.load(args.model_name_or_path + "/quantized_model.pt") + model = torch.jit.freeze(model.eval()) + else: + model = torch.jit.load(args.model_name_or_path + "/quantized_model.pt") + model = torch.jit.freeze(model.eval()) + else: + print("load configure and convert the model") + ipex.nn.utils._model_convert.replace_dropout_with_identity(model) + from intel_extension_for_pytorch.quantization import prepare, convert + from torch.ao.quantization import MinMaxObserver, PerChannelMinMaxObserver, QConfig + qconfig = QConfig(activation=MinMaxObserver.with_args(qscheme=torch.per_tensor_affine, dtype=torch.quint8), weight=PerChannelMinMaxObserver.with_args(dtype=torch.qint8, qscheme=torch.per_channel_symmetric)) + prepared_model = prepare(model, qconfig, example_inputs=jit_inputs, inplace=False) + prepared_model.load_qconf_summary(qconf_summary = args.model_name_or_path + "/int8_configure.json") + if args.int8_bf16: + with torch.cpu.amp.autocast(): + model = convert(prepared_model) + model = torch.jit.trace(model, jit_inputs, strict=False) + else: + model = convert(prepared_model) + model = torch.jit.trace(model, jit_inputs, strict=False) + model = torch.jit.freeze(model) + + + with torch.no_grad(): + y = model(dumpy_tensor, dumpy_tensor, dumpy_tensor) + y = model(dumpy_tensor, dumpy_tensor, dumpy_tensor) + + # model.save("quantized_model.pt") + # import sys + # sys.exit(0) + + with track("Process bf16 model"): + if args.ipex_bf16: + model = ipex.optimize(model, dtype=torch.bfloat16, level='O0') + dumpy_tensor = torch.ones((training_args.per_device_eval_batch_size, max_seq_len), dtype=torch.long) + jit_inputs = (dumpy_tensor, dumpy_tensor, dumpy_tensor) + with torch.cpu.amp.autocast(), torch.no_grad(): + model = torch.jit.trace(model, jit_inputs, strict=False) + model = torch.jit.freeze(model) + with torch.no_grad(): + y = model(dumpy_tensor, dumpy_tensor, dumpy_tensor) + y = model(dumpy_tensor, dumpy_tensor, dumpy_tensor) + + if args.ipex_fp32: + model = ipex.optimize(model, dtype=torch.float32, level='O1') + + ############################### Inference ################################# + if training_args.do_predict: + with track('Inference'): + batch_size = training_args.per_device_eval_batch_size + all_outputs, all_labels = [], [] + + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + + def to_inputs(batch: dict) -> dict: + return {k: (v if torch.is_tensor(v) else tensor(v)).to(device=device) for k, v in batch.items()} + + def prediction_step(batch, labels): + all_labels.extend(labels) + inputs = to_inputs(batch) + output = model(inputs['input_ids'], inputs['attention_mask']) if args.torchscript \ + else model(**inputs) + all_outputs.append(output['logits'].detach().cpu()) + + model.eval() + + with torch.no_grad(): + if args.profiler: + with torch.profiler.profile( + schedule=torch.profiler.schedule(wait=1, warmup=1, active=3, repeat=2), + on_trace_ready=torch.profiler.tensorboard_trace_handler('./profiler/' + args.profiler_name), + record_shapes=True, + profile_memory=True, + with_stack=True + ) as prof: + for batch in tqdm(DataLoader(test_data, batch_size=batch_size, + collate_fn=DataCollatorWithPadding(tokenizer))): + prediction_step(batch=batch, labels=batch.pop('labels')) + prof.step() + else: + for batch in tqdm(DataLoader(test_data, batch_size=batch_size, + collate_fn=DataCollatorWithPadding(tokenizer))): + prediction_step(batch=batch, labels=batch.pop('labels')) + + acc = compute_metrics(PredsLabels(preds=np.concatenate(all_outputs), labels=all_labels)) + print(f"\n*********** TEST_METRICS ***********\nAccuracy: {acc['acc']}\n") + + bench.summary() + + +if __name__ == "__main__": + main() diff --git a/profiling-transformers/src/utils.py b/profiling-transformers/src/utils.py new file mode 100644 index 0000000..85ff24c --- /dev/null +++ b/profiling-transformers/src/utils.py @@ -0,0 +1,343 @@ +# Copyright (C) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. +# + +# + +import json +from dataclasses import dataclass, field +from typing import Optional +from pathlib import Path +import numpy as np +from time import perf_counter_ns +from dataclasses import dataclass, field +import numpy as np +from contextlib import contextmanager +import os + +SEC_TO_NS_SCALE = 1000000000 + +SPLIT_PATHS = { + ('imdb', 'train'): './datasets/aclImdb/train', + ('imdb', 'test'): './datasets/aclImdb/test', + ('sst2', 'train'): './datasets/sst/train.tsv', + ('sst2', 'test'): './datasets/sst/dev.tsv' +} + + +@dataclass +class Benchmark: + summary_msg: str = field(default_factory=str) + + @property + def num_runs(self) -> int: + return len(self.latencies) + + @contextmanager + def track(self, step): + start = perf_counter_ns() + yield + ns = perf_counter_ns() - start + msg = f"\n{'*' * 70}\n'{step}' took {ns / SEC_TO_NS_SCALE:.3f}s ({ns:,}ns)\n{'*' * 70}\n" + print(msg) + self.summary_msg += msg + '\n' + + def summary(self): + print(f"\n{'#' * 30}\nBenchmark Summary:\n{'#' * 30}\n\n{self.summary_msg}") + + +@dataclass +class Arguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. + """ + model_name_or_path: str = field( + default="bert-base-uncased", + metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} + ) + tokenizer_name: Optional[str] = field( + default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"} + ) + smoke_test: Optional[bool] = field( + default=False, + metadata={"help": "Whether to execute in sanity check mode."} + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + }, + ) + max_test_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of testing examples to this " + "value if set." + }, + ) + instance_index: Optional[int] = field( + default=None, + metadata={ + "help": "for multi-instance inference, to indicate which instance this is." + }, + ) + dataset: Optional[str] = field( + default='imdb', + metadata={ + "help": "Select dataset ('imdb' / 'sst2'). Default is 'imdb'" + }, + ) + max_seq_len: int = field( + default=512, + metadata={ + "help": "The maximum total input sequence length after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + }, + ) + profiler: int = field( + default=0, + metadata={ + "help": "wether using pytorch profiler" + }, + ) + profiler_name: str = field( + default="test", + metadata={ + "help": "log name for pytorch profiler" + }, + ) + ipex: bool = field( + default=False, + metadata={ + "help": "Use Intel® Extension for PyTorch for fine-Tuning." + }, + ) + ipex_bf16: int = field( + default=0, + metadata={ + "help": "Auto mixed precision using bfloat16." + }, + ) + ipex_fp32: int = field( + default=0, + metadata={ + "help": "Auto mixed precision using bfloat16." + }, + ) + bf16_ipex_ft: int = field( + default=False, + metadata={ + "help": "Auto mixed precision using bfloat16 to fine-tuning." + }, + ) + fp32_ipex_ft: int = field( + default=False, + metadata={ + "help": "use ipex optimization for fp32 fine-tuning." + }, + ) + int8_bf16: int = field( + default=0, + metadata={ + "help": "Auto mixed precision using int8+bfloat16." + }, + ) + multi_instance: bool = field( + default=False, + metadata={ + "help": "Whether to use multi-instance mode" + }, + ) + int8: int = field( + default=0, + metadata={ + "help": "Whether to do inference with int8 model" + }, + ) + dist_backend: Optional[str] = field( + default="ccl", metadata={"help": "Distributed backend to use"} + ) + preprocessing_num_workers: Optional[int] = field( + default=None, + metadata={"help": "The number of processes to use for the preprocessing."}, + ) + overwrite_cache: bool = field( + default=True, metadata={"help": "Overwrite the cached training and evaluation sets."} + ) + real_time: bool = field( + default=False, metadata={"help": "Whether to pre-process the inputs in real-time."} + ) + few_shot: bool = field( + default=False, + metadata={ + "help": "Employ few-shot pattern-based MLM training on a small subset of the data." + }, + ) + pattern_id: bool = field( + default=0, metadata={"help": "Few-shot: pattern id of the pattern to use for few-shot training."} + ) + label_loss: bool = field( + default=True, metadata={"help": "Few-shot: whether to use label loss."} + ) + random_mlm: bool = field( + default=False, metadata={"help": "Few-shot: whether to use random MLM loss."} + ) + alpha: float = field( + default=0.6, metadata={"help": "Few-shot: alpha value for loss computation: ."} + ) + torchscript: bool = field( + default=False, metadata={"help": "Enable Torchscript."} + ) + + +class PredsLabels: + def __init__(self, preds, labels): + self.predictions = preds + self.label_ids = labels + + +def compute_metrics(p): + preds = np.argmax(p.predictions, axis=1) + return {"acc": (preds == p.label_ids).mean()} + + +def check_dataset(name: str): + if name == 'imdb': + return [name] + elif name == 'sst2': + return ['glue', 'sst2'] + else: + error_msg = f'Now only imdb and sst2 dataset are supported. Your dataset is {name}.' + raise ValueError(error_msg) + + +def read_dataset(name: str, split: str = "test", generator: bool = False, + return_labels: bool = True, batch_size: int = 1, max_samples: int = None): + split_path = SPLIT_PATHS[(name, split)] + args = split_path, return_labels, batch_size, max_samples + gen = imdb_gen(*args) if name == 'imdb' else sst_gen(*args) + + if generator: + return gen + + texts, labels = [], [] + for text_batch, label_batch in gen: + texts.extend(text_batch) + if return_labels: + labels.extend(label_batch) + return (texts, labels) if return_labels else texts + + +def imdb_gen(split_path, return_label, batch_size, max_samples): + text_batch, label_batch = [], [] + for label_dir in "pos", "neg": + for i, text_file in enumerate((Path(split_path) / label_dir).iterdir()): + text_batch.append(text_file.read_text()) + if return_label: + label_batch.append(0 if label_dir == 'neg' else 1) + if len(text_batch) == batch_size: + yield (text_batch, label_batch) if return_label else text_batch + text_batch, label_batch = [], [] + if max_samples is not None and i == max_samples / 2: + break + if text_batch: + yield (text_batch, label_batch) if return_label else text_batch + text_batch, label_batch = [], [] + + +def sst_gen(split_path, return_label, batch_size, max_samples): + text_batch, label_batch = [], [] + i = 0 + with open(split_path) as f: + for line in f.readlines()[1:]: + if line: + i += 1 + text, label = line.strip().split(" \t") + text_batch.append(text) + if return_label: + label_batch.append(int(label)) + if len(text_batch) == batch_size: + yield (text_batch, label_batch) if return_label else text_batch + text_batch, label_batch = [], [] + if max_samples is not None and i == max_samples: + break + if text_batch: + yield (text_batch, label_batch) if return_label else text_batch + text_batch, label_batch = [], [] + + +def to_tensor_dataset(framework, encodings, labels=None): + if framework == 'tf': + from tensorflow.data import Dataset + + data = (dict(encodings), labels) if labels else dict(encodings) + dataset = Dataset.from_tensor_slices(data) + + if framework == 'pt': + from torch import tensor + from torch.utils.data import Dataset + + class IMDbDataset(Dataset): + def __init__(self, encodings, labels): + self.encodings = encodings + self.labels = labels + + def __getitem__(self, idx): + item = {key: tensor(val[idx]) for key, val in self.encodings.items()} + item['labels'] = tensor(self.labels[idx]) + return item + + def __len__(self): + return len(self.labels) + + dataset = IMDbDataset(encodings, labels) + + return dataset + + +def save_train_metrics(train_result, trainer, max_train): + # pytorch only + if train_result: + metrics = train_result.metrics + metrics["train_samples"] = max_train + trainer.save_metrics("train", metrics) + trainer.save_state() + + +def save_test_metrics(metrics, max_test, output_dir): + metrics['test_samples'] = max_test + with open(Path(output_dir) / 'test_results.json', 'w') as f: + json.dump(metrics, f, indent=2) + return "\n\n******** TEST METRICS ********\n" + '\n'.join(f'{k}: {v}' for k, v in metrics.items()) + + +def read_imdb_split(split_dir): + texts, labels = [], [] + for label_dir in "pos", "neg": + for text_file in (Path(split_dir) / label_dir).iterdir(): + texts.append(text_file.read_text()) + labels.append(0 if label_dir == 'neg' else 1) + return texts, labels + + +def read_sst_file(sst_file): + texts, labels = [], [] + with open(sst_file) as f: + for line in f.readlines()[1:]: + if line: + text, label = line.strip().split(" \t") + texts.append(text) + labels.append(int(label)) + return texts, labels