From 95ca6174ce0b42266ae64b67629fed82294a27b1 Mon Sep 17 00:00:00 2001
From: Marshall Ward <marshall.ward@noaa.gov>
Date: Thu, 17 Aug 2023 16:12:00 -0400
Subject: [PATCH] CI: Improve test.summary rule

The test.summary rule was causing errors on our lustre filesystem, even
when all tests were successful.  While the reason is still not yet
clear, I suspect it is related to our `if ls results/*` tests, which may
not behave as intended on all filesystems.

To somewhat secure us against this, I have replaced the top-level check
with `if [ -d results ]` to check if there are any output from failed
tests.  The subdirectory tests still remain, but at least these will
only happen if any actual results exists.

Other minor changes:

- The script to generate the summary was moved out of the Makefile and
  into a separate script.

- Unrelated to these changes, error output was extended from 20 to 40
  lines, to provide more readable backtrace output.
---
 .testing/Makefile                     | 33 +++++----------------
 .testing/tools/report_test_results.sh | 42 +++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 26 deletions(-)
 create mode 100755 .testing/tools/report_test_results.sh

diff --git a/.testing/Makefile b/.testing/Makefile
index b877ecb5f2..d6b06893fe 100644
--- a/.testing/Makefile
+++ b/.testing/Makefile
@@ -554,8 +554,8 @@ $(WORKSPACE)/work/%/$(1)/ocean.stats $(WORKSPACE)/work/%/$(1)/chksum_diag: build
 	  && $(TIME) $(5) $(MPIRUN) -n $(6) $(abspath $$<) 2> std.err > std.out \
 	  || !( \
 	    mkdir -p ../../../results/$$*/ ; \
-	    cat std.out | tee ../../../results/$$*/std.$(1).out | tail -n 20 ; \
-	    cat std.err | tee ../../../results/$$*/std.$(1).err | tail -n 20 ; \
+	    cat std.out | tee ../../../results/$$*/std.$(1).out | tail -n 40 ; \
+	    cat std.err | tee ../../../results/$$*/std.$(1).err | tail -n 40 ; \
 	    rm ocean.stats chksum_diag ; \
 	    echo -e "$(FAIL): $$*.$(1) failed at runtime." \
 	  )
@@ -630,8 +630,8 @@ $(WORKSPACE)/work/%/restart/ocean.stats: build/symmetric/MOM6 | preproc
 	# Run the first half-period
 	cd $(@D) && $(TIME) $(MPIRUN) -n 1 $(abspath $<) 2> std1.err > std1.out \
 	  || !( \
-	    cat std1.out | tee ../../../results/$*/std.restart1.out | tail -n 20 ; \
-	    cat std1.err | tee ../../../results/$*/std.restart1.err | tail -n 20 ; \
+	    cat std1.out | tee ../../../results/$*/std.restart1.out | tail -n 40 ; \
+	    cat std1.err | tee ../../../results/$*/std.restart1.err | tail -n 40 ; \
 	    echo -e "$(FAIL): $*.restart failed at runtime." \
 	  )
 	# Setup the next inputs
@@ -641,8 +641,8 @@ $(WORKSPACE)/work/%/restart/ocean.stats: build/symmetric/MOM6 | preproc
 	# Run the second half-period
 	cd $(@D) && $(TIME) $(MPIRUN) -n 1 $(abspath $<) 2> std2.err > std2.out \
 	  || !( \
-	    cat std2.out | tee ../../../results/$*/std.restart2.out | tail -n 20 ; \
-	    cat std2.err | tee ../../../results/$*/std.restart2.err | tail -n 20 ; \
+	    cat std2.out | tee ../../../results/$*/std.restart2.out | tail -n 40 ; \
+	    cat std2.err | tee ../../../results/$*/std.restart2.err | tail -n 40 ; \
 	    echo -e "$(FAIL): $*.restart failed at runtime." \
 	  )
 
@@ -652,26 +652,7 @@ $(WORKSPACE)/work/%/restart/ocean.stats: build/symmetric/MOM6 | preproc
 # Not a true rule; only call this after `make test` to summarize test results.
 .PHONY: test.summary
 test.summary:
-	@if ls $(WORKSPACE)/results/*/* &> /dev/null; then \
-	  if ls $(WORKSPACE)/results/*/std.*.err &> /dev/null; then \
-	    echo "The following tests failed to complete:" ; \
-	    ls $(WORKSPACE)/results/*/std.*.out \
-	      | awk '{split($$0,a,"/"); split(a[3],t,"."); v=t[2]; if(length(t)>3) v=v"."t[3]; print a[2],":",v}'; \
-	  fi; \
-	  if ls $(WORKSPACE)/results/*/ocean.stats.*.diff &> /dev/null; then \
-	    echo "The following tests report solution regressions:" ; \
-	    ls $(WORKSPACE)/results/*/ocean.stats.*.diff \
-	      | awk '{split($$0,a,"/"); split(a[3],t,"."); v=t[3]; if(length(t)>4) v=v"."t[4]; print a[2],":",v}'; \
-	  fi; \
-	  if ls $(WORKSPACE)/results/*/chksum_diag.*.diff &> /dev/null; then \
-	    echo "The following tests report diagnostic regressions:" ; \
-	    ls $(WORKSPACE)/results/*/chksum_diag.*.diff \
-	      | awk '{split($$0,a,"/"); split(a[3],t,"."); v=t[2]; if(length(t)>3) v=v"."t[3]; print a[2],":",v}'; \
-	  fi; \
-	  false ; \
-	else \
-	  echo -e "$(PASS): All tests passed!"; \
-	fi
+	@./tools/report_test_results.sh $(WORKSPACE)/results
 
 
 #---
diff --git a/.testing/tools/report_test_results.sh b/.testing/tools/report_test_results.sh
new file mode 100755
index 0000000000..24bab45507
--- /dev/null
+++ b/.testing/tools/report_test_results.sh
@@ -0,0 +1,42 @@
+#!/bin/sh
+RESULTS=${1:-${PWD}/results}
+
+GREEN="\033[0;32m"
+RESET="\033[0m"
+PASS="${GREEN}PASS${RESET}"
+
+if [ -d ${RESULTS} ]; then
+  if ls ${RESULTS}/*/std.*.err &> /dev/null; then
+    echo "The following tests failed to complete:"
+	ls ${RESULTS}/*/std.*.out \
+      | awk '{ \
+        split($$0,a,"/"); \
+        split(a[length(a)],t,"."); \
+        v=t[2]; \
+        if(length(t)>4) v=v"."t[4]; print a[length(a)-1],":",v}'
+  fi
+
+  if ls ${RESULTS}/*/ocean.stats.*.diff &> /dev/null; then
+    echo "The following tests report solution regressions:"
+    ls ${RESULTS}/*/ocean.stats.*.diff \
+      | awk '{ \
+        split($$0,a,"/"); \
+        split(a[length(a)],t,"."); \
+        v=t[3]; \
+        if(length(t)>4) v=v"."t[4]; print a[length(a)-1],":",v}'
+  fi
+
+  if ls ${RESULTS}/*/chksum_diag.*.diff &> /dev/null; then
+    echo "The following tests report diagnostic regressions:"
+    ls ${RESULTS}/*/chksum_diag.*.diff \
+      | awk '{ \
+        split($$0,a,"/"); \
+        split(a[length(a)],t,"."); \
+        v=t[2]; \
+        if(length(t)>4) v=v"."t[4]; print a[length(a)-1],":",v}'
+  fi
+
+  exit 1
+else
+  printf "${PASS}: All tests passed!\n"
+fi