Skip to content

Commit

Permalink
Merge branch 'main' into nlp/cache-start-pos
Browse files Browse the repository at this point in the history
  • Loading branch information
Jack-Khuu authored Jan 23, 2025
2 parents 1a3af81 + 3ce9c8e commit 36dc78b
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 44 deletions.
74 changes: 32 additions & 42 deletions .github/workflows/run-readme-pr-linuxaarch64.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,20 @@ on:

jobs:
test-readme-cpu:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
contents: read
with:
runner: linux-aarch64
gpu-arch-type: cuda
gpu-arch-version: "12.1"
runner: linux.arm64.2xlarge
docker-image: "pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-main"
gpu-arch-type: cpu-aarch64
timeout: 60
script: |
echo "::group::Print machine info"
uname -a
echo "::endgroup::"
echo "::group::Install newer objcopy that supports --set-section-alignment"
yum install -y devtoolset-10-binutils
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
echo "::endgroup::"
TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs readme
echo "::group::Completion"
Expand All @@ -33,41 +31,37 @@ jobs:
echo "::endgroup::"
test-quantization-cpu:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
contents: read
with:
runner: linux-aarch64
gpu-arch-type: cuda
gpu-arch-version: "12.1"
runner: linux.arm64.2xlarge
docker-image: "pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-main"
gpu-arch-type: cpu-aarch64
timeout: 60
script: |
echo "::group::Print machine info"
uname -a
echo "::endgroup::"
echo "::group::Install newer objcopy that supports --set-section-alignment"
yum install -y devtoolset-10-binutils
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
echo "::endgroup::"
TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs quantization
test-gguf-cpu:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
permissions:
id-token: write
contents: read
with:
runner: linux-aarch64
gpu-arch-type: cuda
gpu-arch-version: "12.1"
runner: linux.arm64.2xlarge
docker-image: "pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-main"
gpu-arch-type: cpu-aarch64
timeout: 60
script: |
echo "::group::Print machine info"
uname -a
echo "::endgroup::"
echo "::group::Install newer objcopy that supports --set-section-alignment"
yum install -y devtoolset-10-binutils
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
echo "::endgroup::"
TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs gguf
echo "::group::Completion"
Expand All @@ -77,21 +71,19 @@ jobs:
test-advanced-cpu:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
permissions:
id-token: write
contents: read
with:
runner: linux-aarch64
gpu-arch-type: cuda
gpu-arch-version: "12.1"
runner: linux.arm64.2xlarge
docker-image: "pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-main"
gpu-arch-type: cpu-aarch64
timeout: 60
script: |
echo "::group::Print machine info"
uname -a
echo "::endgroup::"
echo "::group::Install newer objcopy that supports --set-section-alignment"
yum install -y devtoolset-10-binutils
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
echo "::endgroup::"
TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs advanced
echo "::group::Completion"
Expand All @@ -101,21 +93,19 @@ jobs:
test-evaluation-cpu:
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
permissions:
id-token: write
contents: read
with:
runner: linux-aarch64
gpu-arch-type: cuda
gpu-arch-version: "12.1"
runner: linux.arm64.2xlarge
docker-image: "pytorch/manylinux2_28_aarch64-builder:cpu-aarch64-main"
gpu-arch-type: cpu-aarch64
timeout: 60
script: |
echo "::group::Print machine info"
uname -a
echo "::endgroup::"
echo "::group::Install newer objcopy that supports --set-section-alignment"
yum install -y devtoolset-10-binutils
export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
echo "::endgroup::"
TORCHCHAT_DEVICE=cpu .ci/scripts/run-docs evaluation
echo "::group::Completion"
Expand Down
4 changes: 2 additions & 2 deletions torchchat/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -685,7 +685,7 @@ def generate(
sequential_prefill=True,
callback=lambda x: x,
max_seq_length: int,
attention_backend: str = "math",
attention_backend: SDPBackend = torch.nn.attention.SDPBackend.MATH,
seed: Optional[int] = None,
**sampling_kwargs,
) -> torch.Tensor:
Expand Down Expand Up @@ -1126,7 +1126,7 @@ def chat(
messages_to_encode.append(
{"role": "system", "content": self.system_prompt}
)
messages_to_encode.append({"role": "system", "content": prompt})
messages_to_encode.append({"role": "user", "content": prompt})
encoded = self.chat_formatter.encode_dialog_prompt(
messages_to_encode, add_generation_prompt=True,
)
Expand Down

0 comments on commit 36dc78b

Please sign in to comment.