-
Notifications
You must be signed in to change notification settings - Fork 209
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Fix run_tutorials code Summary: Last script actually has some errors but didn't error out, this PR added the logic for the CI job to show error when some job fails and also fixed remaining code Test Plan: CI Reviewers: Subscribers: Tasks: Tags: * checking status code * script * more logs * tensor paralell check * change tp file check condition * deps * testing failing * update loop * try again * try again * done * restore * remove extra pint
- Loading branch information
1 parent
d57704c
commit 12a58cf
Showing
5 changed files
with
37 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,7 @@ lm_eval | |
diskcache | ||
pycocotools | ||
tqdm | ||
importlib_metadata | ||
|
||
# Custom CUDA Extensions | ||
ninja | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,19 +1,37 @@ | ||
#!/bin/bash | ||
find . -type d | while read dir; do | ||
FAILED=0 | ||
for dir in $(find . -type d); do | ||
if [ -f "$dir/run.sh" ]; then | ||
echo "Running: $dir/run.sh" | ||
pushd "$dir" | ||
CURRENT_DIR=$(pwd) | ||
cd "$dir" | ||
bash run.sh | ||
popd | ||
cd "$CURRENT_DIR" | ||
else | ||
find "$dir" -maxdepth 1 -name "*.py" | while read file; do | ||
if [[ "$file" == *"tensor_parallel"* ]]; then | ||
for file in $(find "$dir" -maxdepth 1 -name "*.py"); do | ||
filename=$(basename "$file") | ||
if echo "$filename" | grep -q "tensor_parallel"; then | ||
echo "Running: torchrun --standalone --nnodes=1 --nproc-per-node=1 $file" | ||
torchrun --standalone --nnodes=1 --nproc-per-node=4 "$file" | ||
STATUS=$? | ||
else | ||
echo "Running: python $file" | ||
python "$file" | ||
STATUS=$? | ||
fi | ||
|
||
if [ $STATUS -ne 0 ]; then | ||
FAILED=1 | ||
echo "Test failed: $file" | ||
fi | ||
done | ||
fi | ||
done | ||
|
||
if [ "$FAILED" -eq 1 ]; then | ||
echo "One or more tests failed" | ||
exit 1 | ||
else | ||
echo "All tests passed" | ||
exit 0 | ||
fi |