Skip to content

Introduce 6-bit quantization for Llama in torchchat #116

Introduce 6-bit quantization for Llama in torchchat

Introduce 6-bit quantization for Llama in torchchat #116

name: Build M1 Wheels
on:
pull_request:
paths:
- build/packaging/**
- .github/workflows/build_wheels_m1.yml
- setup.py
push:
branches:
- nightly
- main
- release/*
tags:
# NOTE: Binary build pipelines should only get triggered on release candidate builds
# Release candidate tags look like: v1.11.0-rc1
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
schedule:
- cron: '0 0 * * *' # Runs at midnight UTC every day
workflow_dispatch:
permissions:
id-token: write
contents: read
jobs:
generate-matrix:
uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
with:
package-type: wheel
os: macos-arm64
build:
needs: generate-matrix
if: github.repository_owner == 'pytorch'
name: pytorch/ao
uses: pytorch/test-infra/.github/workflows/build_wheels_macos.yml@main
with:
repository: pytorch/ao
ref: ${{ github.head_ref || github.ref_name }}
build-matrix: ${{ needs.generate-matrix.outputs.matrix }}
pre-script: packaging/pre_build_script.sh
# post-script: packaging/post_build_script.sh
package-name: torchao
runner-type: macos-m1-stable
smoke-test-script: test/smoke_test.py
trigger-event: ${{ github.event_name }}