update vso upload directive to attempt workaround az pipelines bug, u…

…pdate test docker image to use PyTorch 20240827 nightly to allow initial model parallel testing
speediedan · Sep 2, 2024 · 98865a0 · 98865a0
1 parent 1aff16b
commit 98865a0
Show file tree

Hide file tree

Showing 3 changed files with 17 additions and 17 deletions.
diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile
@@ -89,7 +89,7 @@ RUN \
         # ... pytorch patch version
         # pip install torch==1.11.1+cu113 torchvision==0.11.3+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html; \
         # ... pytorch nightly dev version
-        pip install --pre torch==2.5.0.dev20240814  torchvision==0.20.0.dev20240814 --index-url https://download.pytorch.org/whl/nightly/cu124; \
+        pip install --pre torch==2.5.0.dev20240827  torchvision==0.20.0.dev20240827 --index-url https://download.pytorch.org/whl/nightly/cu124; \
         # ... test channel
         #pip install --pre torch==2.4.0 torchvision --index-url https://download.pytorch.org/whl/test/cu124; \
     fi && \

diff --git a/tests/infra_utils.sh b/tests/infra_utils.sh
@@ -53,7 +53,7 @@ execute_tests(){
   ensure_tests
   local execute_def="$1"
   local execute_log="$2"
-  local tmp_out="$3"
+  local tmp_raw_log="$3"
   # hardcoded tests to skip - space separated
   blocklist=''
   export report=''
@@ -72,15 +72,15 @@ execute_tests(){
 
     # run the test
     echo "Running ${parameterization}" | tee -a $execute_log
-    (python ${execute_def} ${parameterization} 2>&1 | sed "s,\x1b\[[0-9;]*[a-zA-Z],,g" >> $tmp_out) > /dev/null
+    (python ${execute_def} ${parameterization} 2>&1 | sed "s,\x1b\[[0-9;]*[a-zA-Z],,g" >> $tmp_raw_log) > /dev/null
     test_to_find=`echo ${parameterization} | sed 's/\[/\\\[/g; s/\]/\\\]/g'`
-    if pass_or_fail=$(grep -E "(PASSED|FAILED|XPASS|XFAIL) .*${test_to_find}" $tmp_out); then
+    if pass_or_fail=$(grep -E "(PASSED|FAILED|XPASS|XFAIL) .*${test_to_find}" $tmp_raw_log); then
       parameterization_result=`echo $pass_or_fail | awk 'NR==1 {print $2 ": "  $1}'`;
-    elif skipped=$(grep -E "${test_to_find}.*SKIPPED" $tmp_out); then
+    elif skipped=$(grep -E "${test_to_find}.*SKIPPED" $tmp_raw_log); then
       parameterization_result=`echo $skipped | awk 'NR==1 {print $1 ": "  $2}'`;
     else
       echo "Could not parse result!" | tee -a $execute_log
-      parameterization_result="UNKNOWN: see $tmp_out"
+      parameterization_result="UNKNOWN: see $tmp_raw_log"
     fi
     report+="Ran\t${parameterization_result}\n"
   done
@@ -119,7 +119,7 @@ show_summary(){
 
 show_final_summary(){
   local test_log="$1"
-  local tmp_out="${2:-}"
+  local tmp_raw_log="${2:-}"
   show_summary "$test_log"
   show_test_counts "$test_log"
   show_elapsed_time "$test_log"
@@ -148,23 +148,23 @@ ensure_tests(){
 show_test_results(){
   ensure_tests
   local test_log="$1"
-  local tmp_out="$2"
-  if [ -f ${tmp_out} ]; then
-    if grep_errors=($(grep --ignore-case --extended-regexp 'error|exception|traceback|failed' ${tmp_out})); then
+  local tmp_raw_log="$2"
+  if [ -f ${tmp_raw_log} ]; then
+    if grep_errors=($(grep --ignore-case --extended-regexp 'error|exception|traceback|failed' ${tmp_raw_log})); then
       echo `printf "%0.s-" {1..120} && printf "\n"` | tee -a $test_log
-      printf "Potential errors detected. Uploading ${tmp_out} and grepping exception/error lines below: \n" | tee -a $test_log
+      printf "Potential errors detected. Uploading ${tmp_raw_log} and grepping exception/error lines below: \n" | tee -a $test_log
       echo `printf "%0.s-" {1..120} && printf "\n"` | tee -a $test_log
       printf ": \n" | tee -a $test_log
-      echo "##vso[task.uploadfile]${tmp_out}"
-      grep --ignore-case --extended-regexp 'error|exception' ${tmp_out} | tee -a $test_log
+      echo "##vso[task.uploadfile]$tmp_raw_log"
+      grep --ignore-case --extended-regexp 'error|exception' ${tmp_raw_log} | tee -a $test_log
       printf "\n" | tee -a $test_log
       show_final_summary "$test_log"
     else
       printf "No detected errors. \n" | tee -a $test_log
       printf "\n" | tee -a $test_log
       show_final_summary "$test_log"
     fi
-  elif [ -f ${test_log} ]; then  # if the log but not the out exists, check for collection errors
+  elif [ -f ${test_log} ]; then  # if the summary log but not the raw test log exists, check for collection errors
     if grep --ignore-case --extended-regexp 'traceback|failed' ${test_log} ; then
       echo "Potential collection error!" | tee -a $test_log
       show_final_summary "$test_log"

diff --git a/tests/special_tests.sh b/tests/special_tests.sh
@@ -81,7 +81,7 @@ if [ -s "${experiments_list}" ]; then
   fi
 fi
 special_test_session_log=${log_file:-"${tmp_log_dir}/special_tests_${mark_type}_${d}.log"}
-test_session_tmp_out="${tmp_log_dir}/special_tests_${mark_type}_${d}.out"
+test_session_tmp_log="${tmp_log_dir}/special_tests_${mark_type}_${d}.log"
 
 # default python coverage arguments
 exec_defaults='-m coverage run --source src/finetuning_scheduler --append -m pytest --capture=no --no-header -v -s -rA'
@@ -127,9 +127,9 @@ define_configuration(){
   printf '\n' | tee -a  $special_test_session_log
 }
 
-trap 'show_test_results "$special_test_session_log" "$test_session_tmp_out"' EXIT  # show the output on exit
+trap 'show_test_results "$special_test_session_log" "$test_session_tmp_log"' EXIT  # show the output on exit
 
 ## Special coverage collection flow
 define_configuration
 collect_tests "$collect_defaults" "$special_test_session_log"
-execute_tests "$exec_defaults" "$special_test_session_log" "$test_session_tmp_out"
+execute_tests "$exec_defaults" "$special_test_session_log" "$test_session_tmp_log"