Skip to content

Commit

Permalink
enhance test infra functionality with special_tests.sh and optional e…
Browse files Browse the repository at this point in the history
…xperimental patch test masks
  • Loading branch information
speediedan committed Sep 2, 2024
1 parent ea3afc4 commit 20f1f60
Show file tree
Hide file tree
Showing 10 changed files with 405 additions and 127 deletions.
13 changes: 12 additions & 1 deletion .azure-pipelines/gpu-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ trigger:
include:
- "main"
- "release/*"
- "model_parallel_exp_support" # temporarily add for new test infra enhancement validation
- "refs/tags/*"
paths:
include:
Expand Down Expand Up @@ -86,11 +87,21 @@ jobs:
python -m coverage run --source src/finetuning_scheduler -m pytest src/finetuning_scheduler tests -v --junitxml=$(Build.Repository.LocalPath)/test-results.xml --durations=50
displayName: 'Testing: standard'
# - bash: |
# . /tmp/venvs/fts_dev/bin/activate
# bash ./tests/standalone_tests.sh -k test_f
# displayName: 'Testing: standalone multi-gpu'

- bash: |
. /tmp/venvs/fts_dev/bin/activate
bash ./tests/standalone_tests.sh -k test_f
bash ./tests/special_tests.sh --mark_type=standalone --filter_pattern='test_f'
displayName: 'Testing: standalone multi-gpu'
# - bash: |
# . /tmp/venvs/fts_dev/bin/activate
# bash ./tests/special_tests.sh --mark_type=exp_patch --filter_pattern='test_f' --experiment_patch_mask="1 0 0"
# displayName: 'Testing: experimental einsum patch'

- bash: |
. /tmp/venvs/fts_dev/bin/activate
python -m coverage report
Expand Down
3 changes: 3 additions & 0 deletions tests/.experiments
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
ENABLE_FTS_EINSUM_STRATEGY_PATCH
ENABLE_FTS_NUMPY_EXTRACTOR_PATCH
ENABLE_FTS_TRITON_CODEGEN_PATCH
10 changes: 5 additions & 5 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,9 +163,10 @@ def single_process_pg():
os.environ.clear()
os.environ.update(orig_environ)


def pytest_collection_modifyitems(items):
# filter out special tests
# select special tests, all special tests run standalone
# note standalone tests take precedence over experimental tests if both env vars are set
# tests depending on experimental patches do not run in CI by default
if os.getenv("PL_RUN_STANDALONE_TESTS", "0") == "1":
items[:] = [
item
Expand All @@ -174,11 +175,10 @@ def pytest_collection_modifyitems(items):
# has `@RunIf(standalone=True)`
if marker.name == "skipif" and marker.kwargs.get("standalone")
]
elif os.getenv("PL_RUN_SLOW_TESTS", "0") == "1":
elif os.getenv("FTS_EXPERIMENTAL_PATCH_TESTS", "0") == "1":
items[:] = [
item
for item in items
for marker in item.own_markers
# has `@RunIf(slow=True)`
if marker.name == "skipif" and marker.kwargs.get("slow")
if marker.name == "skipif" and marker.kwargs.get("exp_patch")
]
2 changes: 1 addition & 1 deletion tests/helpers/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def multiwarn_check(
unmatched_warns = partial(multiwarn_check, expected_mode=True)

class ExpectedResults(NamedTuple):
expected_state: Dict
expected_state: Optional[Dict] = None
warns_expected: Optional[Tuple] = None
exceptions_expected: Optional[Tuple] = None

Expand Down
31 changes: 16 additions & 15 deletions tests/helpers/runif.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,12 @@
# RunIf aliases
RUNIF_MAP = {
"min2_5": {"min_torch": "2.5.0"},
"alone": {"standalone": True},
"bf16_cuda_alone": {"bf16_cuda": True, "standalone": True},
"min2_2": {"min_torch": "2.2.0"},
"max3_12_min2_3": {"max_python": "3.12", "min_torch": "2.3.0"},
"max3_12_min2_2": {"max_python": "3.12", "min_torch": "2.2.0"},
"einsum_exp": {"exp_patch": {ExpPatch.EINSUM_STRATEGIES}, "min_torch": "2.5.0"},
"einsum_exp": {"exp_patch": {ExpPatch.EINSUM_STRATEGIES}},
}


Expand Down Expand Up @@ -59,7 +61,6 @@ def __new__(
skip_mac_os: bool = False,
standalone: bool = False,
deepspeed: bool = False,
slow: bool = False,
exp_patch: Optional[ExpPatch|Set[ExpPatch]] = None,
**kwargs,
):
Expand All @@ -78,8 +79,6 @@ def __new__(
This requires that the ``PL_RUN_STANDALONE_TESTS=1`` environment variable is set.
deepspeed: Require that microsoft/DeepSpeed is installed.
exp_patch: Require that a given experimental patch is installed.
slow: Mark the test as slow, our CI will run it in a separate job.
This requires that the ``PL_RUN_SLOW_TESTS=1`` environment variable is set.
**kwargs: Any :class:`pytest.mark.skipif` keyword arguments.
"""
conditions = []
Expand Down Expand Up @@ -148,17 +147,19 @@ def __new__(
reasons.append("Deepspeed")

if exp_patch:
if not isinstance(exp_patch, Set):
exp_patch = {exp_patch}
conditions.append(not exp_patch.issubset(_ACTIVE_PATCHES))
reasons.append(f"Required experimental patch configuration {exp_patch} is not active.")

if slow:
env_flag = os.getenv("PL_RUN_SLOW_TESTS", "0")
conditions.append(env_flag != "1")
reasons.append("Slow test")
# used in tests/conftest.py::pytest_collection_modifyitems
kwargs["slow"] = True
# since we want to ensure we separate all experimental test combinations from normal unpatched tests, we
# gate experimental patches with both an environmental flag and the required subset of active patches
env_flag = os.getenv("FTS_EXPERIMENTAL_PATCH_TESTS", "0")
if env_exp_flag := (env_flag != "1"):
conditions.append(env_exp_flag)
reasons.append("Experimental tests not enabled via 'FTS_EXPERIMENTAL_PATCH_TESTS' env variable")
else:
if not isinstance(exp_patch, Set):
exp_patch = {exp_patch}
conditions.append(not exp_patch.issubset(_ACTIVE_PATCHES))
reasons.append(f"Required experimental patch configuration {exp_patch} is not active.")
# used in conftest.py::pytest_collection_modifyitems
kwargs["exp_patch"] = True

reasons = [rs for cond, rs in zip(conditions, reasons) if cond]
return pytest.mark.skipif(
Expand Down
192 changes: 192 additions & 0 deletions tests/infra_utils.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
#!/bin/bash
# Test infra utility functions
# Note we use local variables for many of these to allow more usage flexibility in different contexts

toggle_experimental_patches() {
# Function to encapsulate toggling of the current FTS experimental patch flags on and off. Usage example:
# toggle_experimental_patches /path/to/.experiments 1 0 1
export patch_report=''
filepath="$1"
shift

declare -a exp_patch_flags=($(cat "$filepath"))
declare -a patch_mask=("$@")

if [[ ${#exp_patch_flags[@]} -ne ${#patch_mask[@]} ]]; then
echo "Error: There are currently ${#exp_patch_flags[@]} defined experiments, provided mask should have that length." >&2
return 1
fi

for i in "${!exp_patch_flags[@]}"; do
let arg_idx=i+1
if [[ ${patch_mask[$i]} -eq 1 ]]; then
export "${exp_patch_flags[$i]}"=1
patch_report+="${exp_patch_flags[$i]} value is now: ${!exp_patch_flags[$i]}\n"
else
unset "${exp_patch_flags[$i]}"
fi
done
}

collect_tests(){
local collect_def="$1"
local collect_log="$2"
if special_tests=$(python3 ${collect_def}); then
# match only lines with tests
declare -a -g parameterizations=($(grep -oP '\S+::test_\S+' <<< "$special_tests"))
echo `printf "%0.s-" {1..120} && printf "\n"` | tee -a $collect_log
printf "Collected the following tests: \n" | tee -a $collect_log
echo `printf "%0.s-" {1..120} && printf "\n"` | tee -a $collect_log
printf '%s\n' "${parameterizations[@]}" | tee -a $collect_log
num_collected_tests="${#parameterizations[@]}"
echo "Total number of tests: ${#parameterizations[@]}" | tee -a $collect_log
printf '\n' | tee -a $collect_log
else
printf "No tests were found with the following collection command: python3 ${collect_def} \n" | tee -a $collect_log
printf "Exiting without running tests. \n" | tee -a $collect_log
export no_tests_collected=1
exit 0
fi
}

execute_tests(){
ensure_tests
local execute_def="$1"
local execute_log="$2"
local tmp_out="$3"
# hardcoded tests to skip - space separated
blocklist=''
export report=''
echo `printf "%0.s-" {1..120} && printf "\n"` | tee -a $execute_log
printf "Running the collected tests: \n" | tee -a $execute_log
echo `printf "%0.s-" {1..120} && printf "\n"` | tee -a $execute_log

for i in "${!parameterizations[@]}"; do
parameterization=${parameterizations[$i]}

# check blocklist
if echo $blocklist | grep -F "${parameterization}"; then
report+="Skipped\t$parameterization\n"
continue
fi

# run the test
echo "Running ${parameterization}" | tee -a $execute_log
(python ${execute_def} ${parameterization} 2>&1 | sed "s,\x1b\[[0-9;]*[a-zA-Z],,g" >> $tmp_out) > /dev/null
test_to_find=`echo ${parameterization} | sed 's/\[/\\\[/g; s/\]/\\\]/g'`
if pass_or_fail=$(grep -E "(PASSED|FAILED|XPASS|XFAIL) .*${test_to_find}" $tmp_out); then
parameterization_result=`echo $pass_or_fail | awk 'NR==1 {print $2 ": " $1}'`;
elif skipped=$(grep -E "${test_to_find}.*SKIPPED" $tmp_out); then
parameterization_result=`echo $skipped | awk 'NR==1 {print $1 ": " $2}'`;
else
echo "Could not parse result!" | tee -a $execute_log
parameterization_result="UNKNOWN: see $tmp_out"
fi
report+="Ran\t${parameterization_result}\n"
done
}

show_test_counts(){
local test_log="$1"
export num_failed=0
export num_other=0
if grep_succ=($(printf "$report" | grep -c "PASSED\|XPASSED\|XFAIL")); then num_succ=$grep_succ; else num_succ=0; fi
if grep_failed=($(printf "$report" | grep -c "FAILED")); then num_failed=$grep_failed; fi
if grep_skipped=($(printf "$report" | grep -c "SKIPPED")); then num_skipped=$grep_skipped; else num_skipped=0; fi
printf "\n" | tee -a $test_log
echo `printf "%0.s-" {1..120} && printf "\n"` | tee -a $test_log
printf "Test count summary: \n" | tee -a $test_log
echo `printf "%0.s-" {1..120} && printf "\n"` | tee -a $test_log
printf "Collected: $num_collected_tests \n" | tee -a $test_log
printf "Succeeded (passed+xpassed+xfail): $num_succ \n" | tee -a $test_log
printf "Intentionally skipped: $num_skipped \n" | tee -a $test_log
printf "Failed: $num_failed \n" | tee -a $test_log
num_other=$(($num_collected_tests - $num_succ - $num_failed - $num_skipped))
if [ $num_other -gt 0 ]; then
printf "Other (usually tests skipped due to prior test failure): $num_other \n" | tee -a $test_log
fi
printf '\n' | tee -a $test_log
}

show_summary(){
local test_log="$1"
# summarize test report
echo `printf "%0.s-" {1..120} && printf "\n"` | tee -a $test_log
printf "Finished Tests: \n" | tee -a $test_log
echo `printf "%0.s-" {1..120} && printf "\n"` | tee -a $test_log
printf "$report" | tee -a $test_log
}

show_final_summary(){
local test_log="$1"
local tmp_out="${2:-}"
show_summary "$test_log"
show_test_counts "$test_log"
show_elapsed_time "$test_log"
exit_with_status "$test_log"
}

exit_with_status(){
local test_log="$1"
exit_code=0
if [ $num_failed -gt 0 ] || [ $num_other -gt 0 ]; then
exit_code=1
printf "**Failure (${num_failed}) or other (${num_other}) test counts were greater than 0**! \n" | tee -a $test_log
else
printf "Failure (${num_failed}) and other (${num_other}) test counts were not greater than 0. \n" | tee -a $test_log
fi
printf "Exiting with status code ${exit_code}. \n" | tee -a $test_log
exit $exit_code
}

ensure_tests(){
if [ -n "$no_tests_collected" ]; then
exit 0
fi
}

show_test_results(){
ensure_tests
local test_log="$1"
local tmp_out="$2"
if [ -f ${tmp_out} ]; then
if grep_errors=($(grep --ignore-case --extended-regexp 'error|exception|traceback|failed' ${tmp_out})); then
echo `printf "%0.s-" {1..120} && printf "\n"` | tee -a $test_log
printf "Potential errors detected. See ${tmp_out} for details. Exception/error lines to follow. \n" | tee -a $test_log
echo `printf "%0.s-" {1..120} && printf "\n"` | tee -a $test_log
printf "\n" | tee -a $test_log
show_final_summary "$test_log"
echo `printf "%0.s-" {1..120} && printf "\n"` | tee -a $test_log
printf "Grepped exception/error lines: \n" | tee -a $test_log
echo `printf "%0.s-" {1..120} && printf "\n"` | tee -a $test_log
grep --ignore-case --extended-regexp 'error|exception' ${tmp_out} | tee -a $test_log
printf "\n" | tee -a $test_log
else
printf "No detected errors. \n" | tee -a $test_log
printf "\n" | tee -a $test_log
show_final_summary "$test_log"
fi
elif [ -f ${test_log} ]; then # if the log but not the out exists, check for collection errors
if grep --ignore-case --extended-regexp 'traceback|failed' ${test_log} ; then
echo "Potential collection error!" | tee -a $test_log
show_final_summary "$test_log"
exit 1
fi
fi
}

show_elapsed_time(){
local test_log="$1"
script_name=${2:-$(basename "$0")}
## write elapsed time in user-friendly fashion
end_time=$(date +%s)
elapsed_seconds=$(($end_time-$start_time))
if (( $elapsed_seconds/60 == 0 )); then
printf "${script_name} completed in $elapsed_seconds seconds \n" | tee -a $test_log
elif (( $elapsed_seconds%60 == 0 )); then
printf "${script_name} completed in $(($elapsed_seconds/60)) minutes \n" | tee -a $test_log
else
printf "${script_name} completed in $(($elapsed_seconds/60)) minutes and $(($elapsed_seconds%60)) seconds \n" | tee -a $test_log
fi
printf "\n" | tee -a $test_log
}
Loading

0 comments on commit 20f1f60

Please sign in to comment.