intel-analytics · ivy-lv11 · May 20, 2024 · May 20, 2024
diff --git a/.github/workflows/publish_sub_package.yml b/.github/workflows/publish_sub_package.yml
@@ -4,14 +4,18 @@ on:
   push:
     branches:
       - main
+    pull_request:
+      branches: [main]
+      paths:
+        - "llama-index-integrations/**"
 
 env:
   POETRY_VERSION: "1.6.1"
   PYTHON_VERSION: "3.10"
 
 jobs:
   publish_subpackage_if_needed:
-    if: github.repository == 'run-llama/llama_index'
+    # if: github.repository == 'run-llama/llama_index'
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v3
@@ -30,14 +34,11 @@ jobs:
         run: |
           echo "changed_files=$(git diff --name-only ${{ github.event.before }} ${{ github.event.after }} | grep -v llama-index-core | grep llama-index | grep pyproject | xargs)" >> $GITHUB_OUTPUT
       - name: Publish changed packages
-        env:
-          PYPI_TOKEN: ${{ secrets.LLAMA_INDEX_PYPI_TOKEN }}
         run: |
-          for file in ${{ steps.changed-files.outputs.changed_files }}; do
+          for file in llama-index-integrations/llms/llama-index-llms-ipex-llm/pyproject.toml; do
               cd `echo $file | sed 's/\/pyproject.toml//g'`
               poetry lock
               pip install -e .
-              poetry config pypi-token.pypi $PYPI_TOKEN
-              poetry publish --build
+              poetry publish --build --dry-run
               cd -
           done
diff --git a/llama-index-integrations/llms/llama-index-llms-ipex-llm/examples/more_data_type.py b/llama-index-integrations/llms/llama-index-llms-ipex-llm/examples/more_data_type.py
@@ -33,11 +33,20 @@ def completion_to_prompt(completion):
         choices=["sym_int4", "asym_int4", "sym_int5", "asym_int5", "sym_int8"],
         help="The quantization type the model will convert to.",
     )
+    parser.add_argument(
+        "--device",
+        "-d",
+        type=str,
+        default="xpu",
+        choices=["cpu", "xpu", "auto"],
+        help="The device the model will run on.",
+    )
 
     args = parser.parse_args()
     model_name = args.model_name
     tokenizer_name = args.tokenizer_name
     low_bit = args.low_bit
+    device = args.device
 
     # load the model using low-bit format specified
     llm = IpexLLM.from_model_id(
@@ -48,6 +57,7 @@ def completion_to_prompt(completion):
         load_in_low_bit=low_bit,
         completion_to_prompt=completion_to_prompt,
         generate_kwargs={"do_sample": False},
+        device_map=device,
     )
 
     print(

diff --git a/llama-index-integrations/llms/llama-index-llms-ipex-llm/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-ipex-llm/pyproject.toml
@@ -35,8 +35,15 @@ version = "0.1.2"
 [tool.poetry.dependencies]
 python = ">=3.9,<4.0"
 llama-index-core = "^0.10.0"
-torch = "<2.2.0"
-ipex-llm = {allow-prereleases = true, extras = ["all"], version = "*"}
+ipex-llm = {allow-prereleases = true, extras = ["llama-index"], version = ">=2.1.0b20240514"}
+torch = {optional = true, source = "ipex-xpu-src-us", version = "2.1.0a0"}
+torchvision = {optional = true, source = "ipex-xpu-src-us", version = "0.16.0a0"}
+intel_extension_for_pytorch = {optional = true, source = "ipex-xpu-src-us", version = "2.1.10+xpu"}
+bigdl-core-xe-21 = {optional = true, version = "*"}
+bigdl-core-xe-esimd-21 = {optional = true, version = "*"}
+
+[tool.poetry.extras]
+xpu = ["bigdl-core-xe-21", "bigdl-core-xe-esimd-21", "intel_extension_for_pytorch", "torch", "torchvision"]
 
 [tool.poetry.group.dev.dependencies]
 black = {extras = ["jupyter"], version = "<=23.9.1,>=23.7.0"}
@@ -56,3 +63,13 @@ types-protobuf = "^4.24.0.4"
 types-redis = "4.5.5.0"
 types-requests = "2.28.11.8"  # TODO: unpin when mypy>0.991
 types-setuptools = "67.1.0.0"
+
+[[tool.poetry.source]]
+name = "ipex-xpu-src-us"
+priority = "explicit"
+url = "https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
+
+[[tool.poetry.source]]
+name = "ipex-xpu-src-cn"
+priority = "supplemental"
+url = "https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/"